library(knitr)
knitr::opts_chunk$set(cache = TRUE, warning = FALSE,
                      message = FALSE, cache.lazy = FALSE, cache.path = "../../gitignore/RmdCache/", # keep heavy data in gitignore cache
                      fig.path = "Rmdfig/")
machine="mythinkpad" # define the machine we work on
loadALL = TRUE # load all uniteCov objects
loadannot = TRUE # load genome annotations
sourceDMS = TRUE # Load the calculated DMS
sourceSubUnite = FALSE
source("R02.3_DATALOAD.R")
source("homebrewDMSannotation.R") # needed for annotation, slight modification of genomation

## So far, we don't use DMR (because RRBS single end, this is not super meaningful)
rm(DMRBPlist, DMRlist)

Data getting loaded:

1 Global methylation and fitness

Compare fitness traits between the different offsprings groups. Follow up of Sagonas 2020 & Ferre Ortega’s master’s dissertation

1.1 Calculate BCI

Kaufmann et al. 2014: Body condition of the G2 fish, an estimate of fish health and a predictor of energy reserves and reproductive success, was calculated using there residuals from the regression of body mass on body length (Chellappaet al.1995).

fullMetadata_OFFS$BCI <- residuals(lmer(Wnettofin ~ Slfin * Sex + (1|brotherPairID), data=fullMetadata_OFFS))

## and for parents (no sex difference, only males):
fullMetadata_PAR$BCI <- residuals(lmer(Wnettofin ~ Slfin + (1|brotherPairID), data=fullMetadata_PAR))

Effect of paternal treatment on body condition of offspring: Kaufmann et al. 2014: “To investigate in which way paternal G1 exposure affected offspring tolerance, we tested how the relationship between G2 body condition and infection intensity was affected by paternal G1 exposure. This was tested in a linear mixed model on G2 body condition with paternal G1 treatment and the interaction between paternal G1 treatment and G2 infection intensity as fixed effects. Maternal half-sibship identity was set as a random effect”

1.2 Effect of paternal exposure on tolerance

1.2.1 BCI per trt

Effect of treatment groups of offspring on body condition(Kaufmann et al. 2014): “The linear mixed effect model (nlme function in R) included G2 body condition as dependent variable, sex, G2 treatment (exposed vs. control), paternal G1 treatment (exposed vs. control) and their interactions as fixed effects as well as maternal G2 half-sibship identity as a random effect”

mod1 <- lme(BCI ~ offsTrt * patTrt, random=~1|brotherPairID,data=fullMetadata_OFFS)
anova(mod1) # strong significant effect of both offspring trt & paternal + interactions
##                numDF denDF   F-value p-value
## (Intercept)        1   100  0.000000  1.0000
## offsTrt            1   100 14.057746  0.0003
## patTrt             1   100 13.600342  0.0004
## offsTrt:patTrt     1   100  9.396573  0.0028
mod1.2 <- lme(BCI ~  trtG1G2, random=~1|brotherPairID,data=fullMetadata_OFFS)
## pairwise posthoc test
emmeans(mod1.2, list(pairwise ~ trtG1G2), adjust = "tukey")
## $`emmeans of trtG1G2`
##  trtG1G2    emmean   SE df lower.CL upper.CL
##  NE_control   16.6 10.8  7    -8.87     42.0
##  NE_exposed  -57.7 11.0  7   -83.63    -31.8
##  E_control    23.6 10.8  7    -1.85     49.0
##  E_exposed    15.5 10.8  7    -9.90     41.0
## 
## Degrees-of-freedom method: containment 
## Confidence level used: 0.95 
## 
## $`pairwise differences of trtG1G2`
##  1                       estimate   SE  df t.ratio p.value
##  NE_control - NE_exposed    74.29 15.3 100   4.840  <.0001
##  NE_control - E_control     -7.03 15.2 100  -0.462  0.9671
##  NE_control - E_exposed      1.03 15.2 100   0.067  0.9999
##  NE_exposed - E_control    -81.32 15.3 100  -5.298  <.0001
##  NE_exposed - E_exposed    -73.27 15.3 100  -4.773  <.0001
##  E_control - E_exposed       8.05 15.2 100   0.529  0.9517
## 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 4 estimates
## Control father - treatment offspring has a strongly significantly lower BC than
## every other group, same as Kaufmann et al. 2014

myplot1 <- ggplot(fullMetadata_OFFS, aes(x=trtG1G2, y = BCI, fill=trtG1G2))+
  geom_boxplot()+
  geom_signif(comparisons = list(c("NE_control", "NE_exposed")),
              map_signif_level=TRUE, annotations="***",
              y_position = 150, tip_length = 0, vjust=0.4) +
  geom_signif(comparisons = list(c("NE_exposed", "E_control")),
              map_signif_level=TRUE, annotations="***",
              y_position = 200, tip_length = 0, vjust=0.4) +
  geom_signif(comparisons = list(c("NE_exposed", "E_exposed")),
              map_signif_level=TRUE, annotations="***",
              y_position = 250, tip_length = 0, vjust=0.4) +
  scale_fill_manual(values = colOffs)+
  theme_bw() + theme(legend.position = "none") +
  ylab("Body Condition Index") +
  scale_x_discrete(labels=c("NE_control" = "G1 control\nG2 control", "NE_exposed" = "G1 control\nG2 infected",
                            "E_control" = "G1 infected\nG2 control", "E_exposed" = "G1 infected\nG2 infected"),
                   name = NULL)
myplot1

1.2.2 Tolerance (slope of BCI on worms count) per paternal status

mod_Tol <- lmer(BCI ~ No.Worms*PAT + (1|brotherPairID)+ (1|Sex), data=fullMetadata_OFFS, REML = F)

## Model selection:
step(mod_Tol, reduce.random = F) # Model found: full model
## Backward reduced random-effect table:
## 
##                     Eliminated npar  logLik    AIC LRT Df Pr(>Chisq)
## <none>                            7 -608.33 1230.7                  
## (1 | brotherPairID)          0    6 -608.33 1228.7   0  1          1
## (1 | Sex)                    0    6 -608.33 1228.7   0  1          1
## 
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite 
## 
##              Eliminated Sum Sq Mean Sq NumDF DenDF F value  Pr(>F)  
## No.Worms:PAT          0  20660   20660     1   111  6.1283 0.01481 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Model found:
## BCI ~ No.Worms * PAT + (1 | brotherPairID) + (1 | Sex)
## The slope of BCI on nbrworms varies upon treatment
plot(ggpredict(mod_Tol, terms = c("No.Worms", "PAT")), add.data=T)+ theme_bw() +
  ylab("Body Condition Index") + xlab("Number of worms") +
  ggtitle("Predicted values of Body Condition Index in offspring")+
  scale_color_manual(NULL, values = c("black", "red")) +
  scale_fill_manual(NULL, values = c("black", "red"))  +
  scale_x_continuous(breaks = 0:10)+
  geom_point(size=0)+ # to have color key in legend as point
  guides(colour = guide_legend(override.aes = list(size=3,linetype=0, fill = NA)))

1.4 Nbr/Ratio of Methylated Sites in different groups

We kept in total 135 samples. On average, 11.27 [+/- 0.33] million reads were sequenced. The average mapping efficiency was 85% [+/- 0.48%].

The mean coverage per CpG site in the full dataset, considering positions covered in all fish, is 83 [+/- 2.21].

For G1 only: We kept in total 24 samples. On average, 11.16 [+/- 0.67] million reads were sequenced. The average mapping efficiency was # 84% [+/- 1.39%].

The mean coverage per CpG site in G1, considering positions shared by at least 6 fish per treatment group (half individuals per group), and which overlap with positions retained in G2, is 46 [+/- 1.71].

For G2 only: We kept in total 111 samples. On average, 11.29 [+/- 0.38] million reads were sequenced. The average mapping efficiency was 86% [+/- 0.47%].

The mean coverage per CpG site in G2, considering positions shared by at least 14 fish per treatment group (half individuals per group), and which overlap with positions retained in G1, is 47 [+/- 0.76].

1.5 Choice of global methylation value

cor.test(fullMetadata_PAR$Nbr_coveredCpG,
         fullMetadata_PAR$Nbr_methCpG, method = "spearman")
## 
##  Spearman's rank correlation rho
## 
## data:  fullMetadata_PAR$Nbr_coveredCpG and fullMetadata_PAR$Nbr_methCpG
## S = 350, p-value = 2.153e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.8478261
## S = 350, p-value = 2.15e-06, rho = 0.85
ggplot(fullMetadata_PAR, aes(x=Nbr_coveredCpG, y=Nbr_methCpG))+
  geom_smooth(method = "lm", col="black")+
  geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = c("grey", "red")) +
  theme_bw() + ggtitle(label = "Parents, CpG shared by half fish/trt")

## Check after RMS correction for coverage bias: CORRECTED (p-value = 0.4485)
cor.test(fullMetadata_PAR$Nbr_coveredCpG,
         fullMetadata_PAR$RMS_coveredCpG, method = "spearman")
## 
##  Spearman's rank correlation rho
## 
## data:  fullMetadata_PAR$Nbr_coveredCpG and fullMetadata_PAR$RMS_coveredCpG
## S = 2712, p-value = 0.4006
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.1791304
ggplot(fullMetadata_PAR, aes(x=Nbr_coveredCpG, y=RMS_coveredCpG))+
  geom_smooth(method = "lm", col="black")+
  geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = c("grey", "red")) +
  theme_bw() + ggtitle(label = "Parents, CpG shared by half fish/trt")

## and with residuals: COMPLETELY CORRECTED p-value = 0.9562
cor.test(fullMetadata_PAR$Nbr_coveredCpG,
         fullMetadata_PAR$res_Nbr_methCpG_Nbr_coveredCpG, method = "spearman")
## 
##  Spearman's rank correlation rho
## 
## data:  fullMetadata_PAR$Nbr_coveredCpG and fullMetadata_PAR$res_Nbr_methCpG_Nbr_coveredCpG
## S = 2308, p-value = 0.9886
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##          rho 
## -0.003478261
ggplot(fullMetadata_PAR, aes(x=Nbr_coveredCpG, y=res_Nbr_methCpG_Nbr_coveredCpG))+
  geom_smooth(method = "lm", col="black")+
  geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = c("grey", "red")) +
  theme_bw() + ggtitle(label = "Parents, CpG shared by half fish/trt")

############
## Offspring:
cor.test(fullMetadata_OFFS$Nbr_coveredCpG,
         fullMetadata_OFFS$Nbr_methCpG, method = "spearman")
## 
##  Spearman's rank correlation rho
## 
## data:  fullMetadata_OFFS$Nbr_coveredCpG and fullMetadata_OFFS$Nbr_methCpG
## S = 20254, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.9111355
## S = 20254, p-value < 2.2e-16 rho = 0.91
ggplot(fullMetadata_OFFS, aes(x=Nbr_coveredCpG, y=Nbr_methCpG))+
  geom_smooth(method = "lm", col="black")+
  geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = colOffs) +
  scale_x_continuous("Number of cytosines covered") +
  scale_y_continuous("Number of methylated cytosines") +
  theme_bw() + ggtitle(label = "Offspring, CpG shared by half fish/trt")

## Plot distance to residuals:
fit <- lm(Nbr_methCpG ~ Nbr_coveredCpG, data = fullMetadata_OFFS)
plotdf <- fullMetadata_OFFS
plotdf$predicted <- predict(fit)   # Save the predicted values
plotdf$residuals <- residuals(fit)
ggplot(plotdf, aes(x=Nbr_coveredCpG, y=Nbr_methCpG))+
  geom_smooth(method = "lm", col="black")+
  geom_segment(aes(xend = Nbr_coveredCpG, yend = predicted), col = "grey") +
  geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = colOffs) +
  scale_x_continuous("Number of cytosines covered") +
  scale_y_continuous("Number of methylated cytosines") +
  theme_bw() + ggtitle(label = "Offspring, CpG shared by half fish/trt")

## Check after RMS correction for coverage bias: SEMI CORRECTED (p-value = 0.01, rho = -0.24)
cor.test(fullMetadata_OFFS$Nbr_coveredCpG,
         fullMetadata_OFFS$RMS_coveredCpG, method = "spearman")
## 
##  Spearman's rank correlation rho
## 
## data:  fullMetadata_OFFS$Nbr_coveredCpG and fullMetadata_OFFS$RMS_coveredCpG
## S = 282466, p-value = 0.01158
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.2393208
ggplot(fullMetadata_OFFS, aes(x=Nbr_coveredCpG, y=RMS_coveredCpG))+
  geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = colOffs) +
  geom_smooth(method = "lm", col="black")+
  theme_bw() + ggtitle(label = "Offspring, CpG shared by half fish/trt")

## and with residuals: COMPLETELY CORRECTED p-value = 0.51
cor.test(fullMetadata_OFFS$Nbr_coveredCpG,
         fullMetadata_OFFS$res_Nbr_methCpG_Nbr_coveredCpG, method = "spearman")
## 
##  Spearman's rank correlation rho
## 
## data:  fullMetadata_OFFS$Nbr_coveredCpG and fullMetadata_OFFS$res_Nbr_methCpG_Nbr_coveredCpG
## S = 213674, p-value = 0.514
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## 0.06250439
ggplot(fullMetadata_OFFS, aes(x=Nbr_coveredCpG, y=res_Nbr_methCpG_Nbr_coveredCpG))+
  geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = colOffs) +
  geom_smooth(method = "lm", col="black")+
  scale_x_continuous("Number of cytosines covered") +
  scale_y_continuous("Residuals of number of methylated cytosines\n on number of cytosines covered") +
  theme_bw() + ggtitle(label = "Offspring, CpG shared by half fish/trt")

1.6 Why we should we correct for sex

1.6.1 No difference in mappability (p>0.05)

mod = lm(MappingEfficiency.BSBoldvsGynogen ~ Sex, data = fullMetadata_OFFS)
summary(step(mod))
## Start:  AIC=203.17
## MappingEfficiency.BSBoldvsGynogen ~ Sex
## 
##        Df Sum of Sq    RSS    AIC
## <none>              667.74 203.18
## - Sex   1    22.563 690.30 204.86
## 
## Call:
## lm(formula = MappingEfficiency.BSBoldvsGynogen ~ Sex, data = fullMetadata_OFFS)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9847 -1.5030  0.3133  2.0418  4.0823 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  86.3134     0.3337 258.624   <2e-16 ***
## SexM         -0.9017     0.4699  -1.919   0.0576 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.475 on 109 degrees of freedom
## Multiple R-squared:  0.03269,    Adjusted R-squared:  0.02381 
## F-statistic: 3.683 on 1 and 109 DF,  p-value: 0.05758
plot(ggpredict(mod, terms = c("Sex")), add.data = T)

NB: this is WITH unknown and sex chromosomes, before filtering.

1.6.2 No difference in number of reads (p>0.05)

mod = lm(M.Seqs_rawReads ~ Sex, data = fullMetadata_OFFS)
summary(step(mod))
## Start:  AIC=158.08
## M.Seqs_rawReads ~ Sex
## 
##        Df Sum of Sq    RSS    AIC
## - Sex   1    3.8569 448.66 157.04
## <none>              444.80 158.08
## 
## Step:  AIC=157.04
## M.Seqs_rawReads ~ 1
## 
## Call:
## lm(formula = M.Seqs_rawReads ~ 1, data = fullMetadata_OFFS)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4901 -1.1901 -0.2901  0.8599  8.8099 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  11.2901     0.1917    58.9   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.02 on 110 degrees of freedom
plot(ggpredict(mod, terms = c("Sex")), add.data = T)

NB: this is WITH unknown and sex chromosomes, before filtering.

1.6.3 No difference in mean coverage per CpG in the filtered dataset (p>0.05)

mod = lm(MeanCoverage ~ Sex, data = fullMetadata_OFFS)
summary(step(mod))
## Start:  AIC=314.87
## MeanCoverage ~ Sex
## 
##        Df Sum of Sq    RSS    AIC
## - Sex   1    4.4425 1831.0 313.14
## <none>              1826.5 314.87
## 
## Step:  AIC=313.14
## MeanCoverage ~ 1
## 
## Call:
## lm(formula = MeanCoverage ~ 1, data = fullMetadata_OFFS)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.8662 -2.4123 -0.6565  1.7138 15.0594 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  46.8858     0.3872   121.1   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.08 on 110 degrees of freedom
plot(ggpredict(mod, terms = c("Sex")), add.data = T)

NB: this is in G2, considering positions shared by at least 14 fish per treatment group (half individuals per group), and which overlap with positions retained in G1, without sex and unknown chromosome (after filtering)

1.6.4 No difference in number of sites covered in the filtered dataset (p>0.05)

mod = lm(Nbr_coveredCpG ~ Sex, data = fullMetadata_OFFS)
summary(step(mod))
## Start:  AIC=2579.96
## Nbr_coveredCpG ~ Sex
## 
##        Df  Sum of Sq        RSS    AIC
## - Sex   1 1.9317e+10 1.3495e+12 2579.6
## <none>               1.3302e+12 2580.0
## 
## Step:  AIC=2579.56
## Nbr_coveredCpG ~ 1
## 
## Call:
## lm(formula = Nbr_coveredCpG ~ 1, data = fullMetadata_OFFS)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -338266  -58536   26350   82661  139983 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   853897      10513   81.22   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 110800 on 110 degrees of freedom
plot(ggpredict(mod, terms = c("Sex")), add.data = T)

NB: this is in G2, considering positions shared by at least 14 fish per treatment group (half individuals per group), and which overlap with positions retained in G1, without sex and unknown chromosome (after filtering)

1.6.5 No difference in number of sites covered in the filtered dataset (p>0.05)

mod = lm(OverallPercentageMethylation ~ Sex, data = fullMetadata_OFFS)
summary(step(mod))
## Start:  AIC=154.63
## OverallPercentageMethylation ~ Sex
## 
##        Df Sum of Sq    RSS    AIC
## <none>              431.18 154.63
## - Sex   1    12.428 443.61 155.78
## 
## Call:
## lm(formula = OverallPercentageMethylation ~ Sex, data = fullMetadata_OFFS)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9467 -1.2674 -0.4101  0.6060 10.3697 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  56.1035     0.2682 209.196   <2e-16 ***
## SexM         -0.6692     0.3776  -1.772   0.0791 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.989 on 109 degrees of freedom
## Multiple R-squared:  0.02801,    Adjusted R-squared:  0.0191 
## F-statistic: 3.142 on 1 and 109 DF,  p-value: 0.07911
plot(ggpredict(mod, terms = c("Sex")), add.data = T)

NB: this is in G2, considering positions shared by at least 14 fish per treatment group (half individuals per group), and which overlap with positions retained in G1, without sex and unknown chromosome (after filtering)

1.6.6 Males have a lower global methylation than females (residuals of nbr of methylated sites by nbr of sites covered)

mod = lm(res_Nbr_methCpG_Nbr_coveredCpG ~ Sex, data = fullMetadata_OFFS)
summary(step(mod)) # sex is significant p = 0.000157 ***
## Start:  AIC=2165.93
## res_Nbr_methCpG_Nbr_coveredCpG ~ Sex
## 
##        Df  Sum of Sq        RSS    AIC
## <none>               3.1917e+10 2165.9
## - Sex   1 4493411296 3.6411e+10 2178.6
## 
## Call:
## lm(formula = res_Nbr_methCpG_Nbr_coveredCpG ~ Sex, data = fullMetadata_OFFS)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -38952 -10289   -519  10434  45882 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     6420       2307   2.782 0.006361 ** 
## SexM          -12726       3248  -3.917 0.000157 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17110 on 109 degrees of freedom
## Multiple R-squared:  0.1234, Adjusted R-squared:  0.1154 
## F-statistic: 15.35 on 1 and 109 DF,  p-value: 0.0001565
anova(mod)
## Analysis of Variance Table
## 
## Response: res_Nbr_methCpG_Nbr_coveredCpG
##            Df     Sum Sq    Mean Sq F value    Pr(>F)    
## Sex         1 4.4934e+09 4493411296  15.345 0.0001565 ***
## Residuals 109 3.1917e+10  292820190                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(ggpredict(mod, terms = c("Sex")), add.data = T) +
  xlab(NULL)+
  ylab("Residuals of N methylated sites on N covered sites") +
  ggtitle("Predicted values of global methylation in offspring")

1.7 Are mean residuals meth sites different following tolerance slope?

1.7.1 In all offspring

fullMetadata_OFFS$res_Nbr_methCpG_Nbr_coveredCpG_div1000 <- (fullMetadata_OFFS$res_Nbr_methCpG_Nbr_coveredCpG)/1000

mod_Tol.Meth <- lmer(BCI ~ res_Nbr_methCpG_Nbr_coveredCpG_div1000*No.Worms*PAT + (1|brotherPairID)+ (1|Sex),
                     data=fullMetadata_OFFS, REML = F)

## Model selection:
step(mod_Tol.Meth, reduce.random = F) # Model found: BCI ~ No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + No.Worms:PAT
## Backward reduced random-effect table:
## 
##                     Eliminated npar  logLik    AIC LRT Df Pr(>Chisq)
## <none>                           11 -606.89 1235.8                  
## (1 | brotherPairID)          0   10 -606.89 1233.8   0  1          1
## (1 | Sex)                    0   10 -606.89 1233.8   0  1          1
## 
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite 
## 
##                                                     Eliminated  Sum Sq Mean Sq
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms:PAT          1  2780.8  2780.8
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT                   2  2396.3  2396.3
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms              3  1829.8  1829.8
## res_Nbr_methCpG_Nbr_coveredCpG_div1000                       4  2571.6  2571.6
## No.Worms:PAT                                                 0 20659.8 20659.8
##                                                     NumDF DenDF F value  Pr(>F)
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms:PAT     1   111  0.8465 0.35953
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT              1   111  0.7240 0.39667
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms         1   111  0.5492 0.46020
## res_Nbr_methCpG_Nbr_coveredCpG_div1000                  1   111  0.7681 0.38270
## No.Worms:PAT                                            1   111  6.1283 0.01481
##                                                      
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms:PAT  
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT           
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms      
## res_Nbr_methCpG_Nbr_coveredCpG_div1000               
## No.Worms:PAT                                        *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Model found:
## BCI ~ No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + No.Worms:PAT
## The slope of BCI on nbrworms varies upon treatment but methylation does NOT vary with tolerance
mod_Tol.Meth <- lmer(BCI ~ No.Worms*PAT + (1|brotherPairID)+ (1|Sex),
                     data=fullMetadata_OFFS)

## And by treatment instead of No.worms?
mod_Tol.Meth2 <- lmer(BCI ~ res_Nbr_methCpG_Nbr_coveredCpG_div1000*PAT*outcome + (1|brotherPairID)+ (1|Sex),
                      data=fullMetadata_OFFS, REML = F)

## Model selection:
step(mod_Tol.Meth2, reduce.random = F) # Model found: BCI ~ PAT + outcome + (1 | brotherPairID) + (1 | Sex) + PAT:outcome
## Backward reduced random-effect table:
## 
##                     Eliminated npar  logLik    AIC LRT Df Pr(>Chisq)
## <none>                           11 -603.06 1228.1                  
## (1 | brotherPairID)          0   10 -603.06 1226.1   0  1          1
## (1 | Sex)                    0   10 -603.06 1226.1   0  1          1
## 
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite 
## 
##                                                    Eliminated  Sum Sq Mean Sq
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT:outcome          1  2156.6  2156.6
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:outcome              2   494.6   494.6
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT                  3  1034.5  1034.5
## res_Nbr_methCpG_Nbr_coveredCpG_div1000                      4  2542.3  2542.3
## PAT:outcome                                                 0 30432.9 30432.9
##                                                    NumDF DenDF F value   Pr(>F)
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT:outcome     1   111  0.7034 0.403442
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:outcome         1   111  0.1603 0.689644
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT             1   111  0.3348 0.564008
## res_Nbr_methCpG_Nbr_coveredCpG_div1000                 1   111  0.8203 0.367046
## PAT:outcome                                            1   111  9.7478 0.002289
##                                                      
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT:outcome   
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:outcome       
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT           
## res_Nbr_methCpG_Nbr_coveredCpG_div1000               
## PAT:outcome                                        **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Model found:
## BCI ~ PAT + outcome + (1 | brotherPairID) + (1 | Sex) + PAT:outcome

The slope of BCI on nbr worms varies upon parental treatment, but methylation does NOT vary with tolerance

1.7.2 In exposed offspring only

## By group, tolerance slope as a function of methylation residuals:
modFULL <- lmer(BCI ~ res_Nbr_methCpG_Nbr_coveredCpG_div1000*No.Worms + (1|brotherPairID) + (1|Sex),
                data = fullMetadata_OFFS[fullMetadata_OFFS$trtG1G2 %in% c("NE_exposed", "E_exposed"),])
## Model selection:
step(modFULL, reduce.random = F) # Model found: BCI ~ (1 | brotherPairID) + (1 | Sex)
## Backward reduced random-effect table:
## 
##                     Eliminated npar  logLik    AIC      LRT Df Pr(>Chisq)
## <none>                            7 -293.63 601.26                       
## (1 | brotherPairID)          0    6 -293.63 599.26 0.000000  1     1.0000
## (1 | Sex)                    0    6 -293.66 599.32 0.067386  1     0.7952
## 
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite 
## 
##                                                 Eliminated Sum Sq Mean Sq NumDF
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms          1  362.1   362.1     1
## No.Worms                                                 2  577.4   577.4     1
## res_Nbr_methCpG_Nbr_coveredCpG_div1000                   3 8726.5  8726.5     1
##                                                  DenDF F value Pr(>F)
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms 49.047  0.1098 0.7418
## No.Worms                                        51.973  0.1776 0.6752
## res_Nbr_methCpG_Nbr_coveredCpG_div1000          37.954  2.7327 0.1066
## 
## Model found:
## BCI ~ (1 | brotherPairID) + (1 | Sex)
modFULL <- lmer(BCI ~ res_Nbr_methCpG_Nbr_coveredCpG_div1000*PAT + (1|brotherPairID) + (1|Sex),
                data = fullMetadata_OFFS[fullMetadata_OFFS$trtG1G2 %in% c("NE_exposed", "E_exposed"),])
## Model selection:
step(modFULL, reduce.random = F) # Model found: BCI ~ PAT + (1 | brotherPairID) + (1 | Sex)
## Backward reduced random-effect table:
## 
##                     Eliminated npar  logLik    AIC    LRT Df Pr(>Chisq)
## <none>                            7 -278.14 570.27                     
## (1 | brotherPairID)          0    6 -278.14 568.27 0.0000  1     1.0000
## (1 | Sex)                    0    6 -278.93 569.86 1.5904  1     0.2073
## 
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite 
## 
##                                            Eliminated Sum Sq Mean Sq NumDF
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT          1    504     504     1
## res_Nbr_methCpG_Nbr_coveredCpG_div1000              2   1476    1476     1
## PAT                                                 0  76908   76908     1
##                                             DenDF F value    Pr(>F)    
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT 50.451  0.2624    0.6107    
## res_Nbr_methCpG_Nbr_coveredCpG_div1000     51.749  0.7782    0.3818    
## PAT                                        47.592 41.0412 6.195e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Model found:
## BCI ~ PAT + (1 | brotherPairID) + (1 | Sex)

1.8 PCA based on all methylation values in G2

# 1. get raw values
percmeth = percMethylation(uniteCov14_G2_woSexAndUnknowChrOVERLAP)

# Run PCA on complete data (CpG covered in all fish)
PCA_allpos <- myPCA(x = t(na.omit(percmeth)), incomplete = FALSE)
## [1] "The chosen model is:"
## BCI ~ No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + No.Worms:PAT
## <environment: 0x55deb5366228>

We perform a PCA on 78384 CpG sites covered in all G2 individuals. We first perform a test on the complete dataset.

1.8.1 PCA plot with associated colors for treatments

fviz_pca_ind(PCA_allpos$res.PCA,  label="none", habillage=PCA_allpos$metadata$trtG1G2) +
  scale_color_manual(values = colOffs)+
  scale_shape_manual(values=c(19,19,19,19))

1.8.2 PCA plot with associated colors for brother pair

fviz_pca_ind(PCA_allpos$res.PCA,  label="none", habillage=as.factor(PCA_allpos$metadata$brotherPairID))

# The function dimdesc() can be used to identify the most correlated variables with a given principal component.
mydimdesc <- dimdesc(PCA_allpos$res.PCA, axes = c(1,2), proba = 0.05)

There are 36212 CpG sites most correlated (p < 0.05) with the first principal component , and 15101 with the second principal component.

The 2 first PCA axes do not explain BCI (p<0.05)

1.8.3 How much of the BCI variance is explained by each variables?

# Percentage of variance explained by each factor:
formula(PCA_allpos$modSel) # BCI ~ No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + No.Worms:PAT
## BCI ~ No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + No.Worms:PAT
## <environment: 0x55deb5366228>
mod_noworms = lmer(BCI ~ PAT + (1 | brotherPairID) + (1 | Sex), data = PCA_allpos$metadata)
mod_noPAT = lmer(BCI ~ No.Worms + (1 | brotherPairID) + (1 | Sex), data = PCA_allpos$metadata)

# R2c conditional R2 value associated with fixed effects plus the random effects.
A = (MuMIn::r.squaredGLMM(PCA_allpos$modSel)[2] -
       MuMIn::r.squaredGLMM(mod_noworms)[2])*100

B = (MuMIn::r.squaredGLMM(PCA_allpos$modSel)[2] -
       MuMIn::r.squaredGLMM(mod_noPAT)[2])*100
  • 10.75% of the variance in associated with the parasite load (number of worms)
  • 13.38% of the variance in associated with the paternal treatment

1.9 Pretty summary picture

# Set up scatterplot
scatterplot <- ggplot(fullMetadata_OFFS,
                      aes(x = res_Nbr_methCpG_Nbr_coveredCpG,
                          y = BCI, fill=trtG1G2)) +
  geom_point(pch=21, size =3, alpha = .8) +
  guides(color = "none") +
  scale_fill_manual(values = colOffs, name = "Treatment",
                    labels = c("G1 control - G2 control", "G1 control - G2 exposed", "G1 exposed - G2 control", "G1 exposed - G2 exposed")) +
  theme(plot.margin = margin()) + theme_bw() +
  theme(legend.position = "none") +
  xlab("Methylation residuals (methylated sites/coverage")+
  ylab("Body Condition Index")

# Define marginal histogram
marginal_distribution <- function(x, var, group) {
  ggplot(x, aes_string(x = var, fill = group)) +
    # geom_histogram(bins = 30, alpha = 0.4, position = "identity") +
    geom_density(alpha = 0.6, size = 0.2) +
    guides(fill = "none") +
    scale_fill_manual(values = colOffs) +
    theme_void() +
    theme(plot.margin = margin())
}

# Set up marginal histograms
x_hist <- marginal_distribution(fullMetadata_OFFS, "res_Nbr_methCpG_Nbr_coveredCpG", "trtG1G2")
y_hist <- marginal_distribution(fullMetadata_OFFS, "BCI", "trtG1G2") +
  coord_flip()

# Align histograms with scatterplot
aligned_x_hist <- align_plots(x_hist, scatterplot, align = "v")[[1]]
aligned_y_hist <- align_plots(y_hist, scatterplot, align = "h")[[1]]

# Arrange plots
cowplot::plot_grid(
  aligned_x_hist, NULL, scatterplot, aligned_y_hist, ncol = 2, nrow = 2, rel_heights = c(0.2, 1), rel_widths = c(1, 0.2)
)

2 Methylation profile

Methylation profiles, CpG present in all fish

2.1 Dendogram of methylations

Methylation profile for the 55530 CpG sites covered in all G1 & G2 fish (N = 135:

makePrettyMethCluster(uniteCovALL_woSexAndUnknowChr, fullMetadata,
                      my.cols.trt=c("#333333ff","#ff0000ff","#ffe680ff","#ff6600ff","#aaccffff","#aa00d4ff"),
                      my.cols.fam = c(1:4), nbrk = 8)

2.1.1 Offspring:

Methylation profile for the 78384 CpG sites covered in all G2 fish (N = 111:

makePrettyMethCluster(uniteCovALL_G2_woSexAndUnknowChr, fullMetadata_OFFS,
                      my.cols.trt=c("#ffe680ff","#ff6600ff", "#aaccffff", "#aa00d4ff"),
                      my.cols.fam = c(1:4), nbrk = 8)

# Save
pdf(file = "../../dataOut/clusterALLCpG_offspings.pdf", width = 10, height = 4)
makePrettyMethCluster(uniteCovALL_G2_woSexAndUnknowChr, fullMetadata_OFFS,
                      my.cols.trt=c("#ffe680ff","#ff6600ff", "#aaccffff", "#aa00d4ff"),
                      my.cols.fam = c(1:4), nbrk = 8)
dev.off()
## png 
##   2

2.2 Adonis tests: impact of different variables on methylation pattern

Permutational multivariate analysis of variance (PERMANOVA) is a non-parametric multivariate statistical test. It is used to compare groups of objects and test the null hypothesis that the centroids and dispersion of the groups as defined by measure space are equivalent for all groups.

# make distance matrix with B-C distances
data.dist = makeDatadistFUN(uniteCovALL_G2_woSexAndUnknowChr)

## Adonis test: importance of each predictor
adonis2(data.dist ~ PAT * outcome * Sex * brotherPairID, data = fullMetadata_OFFS)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = data.dist ~ PAT * outcome * Sex * brotherPairID, data = fullMetadata_OFFS)
##                                Df SumOfSqs      R2      F Pr(>F)    
## PAT                             1 0.002782 0.01470 1.8292  0.002 ** 
## outcome                         1 0.001909 0.01009 1.2552  0.022 *  
## Sex                             1 0.002418 0.01277 1.5895  0.001 ***
## brotherPairID                   7 0.028018 0.14805 2.6316  0.001 ***
## PAT:outcome                     1 0.001680 0.00888 1.1045  0.137    
## PAT:Sex                         1 0.001492 0.00789 0.9813  0.503    
## outcome:Sex                     1 0.001557 0.00823 1.0240  0.325    
## PAT:brotherPairID               7 0.014344 0.07580 1.3473  0.001 ***
## outcome:brotherPairID           7 0.010591 0.05596 0.9947  0.518    
## Sex:brotherPairID               7 0.010394 0.05492 0.9763  0.649    
## PAT:outcome:Sex                 1 0.001453 0.00768 0.9555  0.610    
## PAT:outcome:brotherPairID       7 0.010351 0.05470 0.9722  0.728    
## PAT:Sex:brotherPairID           7 0.010249 0.05415 0.9626  0.739    
## outcome:Sex:brotherPairID       6 0.008749 0.04623 0.9587  0.743    
## PAT:outcome:Sex:brotherPairID   1 0.001127 0.00596 0.7413  0.998    
## Residual                       54 0.082132 0.43399                  
## Total                         110 0.189247 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Results: family of the father (brotherPairID) explains more than 14% of the variance in methylation.

To focus on G1 and G2 treatments, we define the permutation structure considering brother pairs (N = 8), and use a PERMANOVA to test the hypothesis that paternal treatment, offspring treatment and their interactions significantly influencing global methylation.

perm <- how(nperm = 1000) # 1000 permutations
setBlocks(perm) <- with(fullMetadata_OFFS, brotherPairID) # define the permutation structure considering brotherPairID and sex
print(adonis2(data.dist ~ PAT * outcome * Sex, data = fullMetadata_OFFS, permutations = perm))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks:  with(fullMetadata_OFFS, brotherPairID) 
## Permutation: free
## Number of permutations: 1000
## 
## adonis2(formula = data.dist ~ PAT * outcome * Sex, data = fullMetadata_OFFS, permutations = perm)
##                  Df SumOfSqs      R2      F   Pr(>F)    
## PAT               1 0.002782 0.01470 1.6344 0.000999 ***
## outcome           1 0.001909 0.01009 1.1216 0.034965 *  
## Sex               1 0.002418 0.01277 1.4203 0.005994 ** 
## PAT:outcome       1 0.001716 0.00907 1.0080 0.089910 .  
## PAT:Sex           1 0.001740 0.00920 1.0224 0.207792    
## outcome:Sex       1 0.001703 0.00900 1.0004 0.316683    
## PAT:outcome:Sex   1 0.001653 0.00874 0.9712 0.307692    
## Residual        103 0.175326 0.92644                    
## Total           110 0.189247 1.00000                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Results:

  • 1.5% of the variation explained by PAT (R2=0.01470, p < 0.001)
  • 1% of the variation explained by outcome (R2=0.01009, p = 0.03)
  • 1.3% of the variation explained by Sex (R2=0.01277, p < 0.01)

2.3 NMDS

#### RUN Goodness of fit
myGOF.NMDS.FUN(dataset = uniteCovALL_G2_woSexAndUnknowChr)

Goodness of fit for NMDS suggested the presence of six dimensions with a stress value > 0.1 and 2 with > 0.2

## to find the seed that allows convergence:
# sapply(3:10, function(x) myNMDS(dataset = uniteCovALL_G2_woSexAndUnknowChr, metadata = fullMetadata_OFFS, myseed = x))
NMDSanalysis <- myNMDSFUN(dataset = uniteCovALL_G2_woSexAndUnknowChr, metadata = fullMetadata_OFFS, myseed = 4)

NMDSanalysis$NMDSplot

# Save
pdf(file = "../../dataOut/NMDSplot_allG2.pdf", width = 9, height = 11)
NMDSanalysis$NMDSplot
dev.off()
## png 
##   2

2.4 The methylation pattern is more affected by direct treatment when the father was infected (Adonis test WITHIN both parental trt)

2.4.1 1. Parents NOT infected

AdonisWithinG1trtFUN(trtgp = c(2,3))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks:  with(fullMetadata_OFFS[fullMetadata_OFFS$trtG1G2_NUM %in% trtgp,  
## Permutation: free
## Number of permutations: 1000
## 
## adonis2(formula = data.dist ~ outcome + Sex + brotherPairID, data = fullMetadata_OFFS[fullMetadata_OFFS$trtG1G2_NUM %in% trtgp, ], permutations = perm)
## Model: adonis0(formula = ~outcome + Sex + brotherPairID, data = data, method = method)
##               Df SumOfSqs      R2      F   Pr(>F)    
## outcome        1 0.001475 0.01622 1.0073 0.486513    
## Sex            1 0.002094 0.02302 1.4301 0.000999 ***
## brotherPairID  7 0.020026 0.22018 1.9537 0.001998 ** 
## Residual      46 0.067357 0.74058                    
## Total         55 0.090952 1.00000                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Results:

  • brother pair explains 22% of the observed variance p < 0.01
  • sex explains 2.3% of the observed variance p < 0.001
  • Offspring trt explains 1.6% of the observed variance (non significant)
NMDSanalysis_G1control <- myNMDSFUN(dataset = uniteCovALL_G2_woSexAndUnknowChr,
                                    metadata = fullMetadata_OFFS, myseed = 25,
                                    byParentTrt=TRUE,
                                    trtgp = c(2,3))

#png(filename = "../../dataOut/NMDSplot_G1fromControlG2.png", width = 900, height = 900)
NMDSanalysis_G1control$NMDSplot

#dev.off()

2.4.2 2. Parents infected

AdonisWithinG1trtFUN(trtgp = c(5,6))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks:  with(fullMetadata_OFFS[fullMetadata_OFFS$trtG1G2_NUM %in% trtgp,  
## Permutation: free
## Number of permutations: 1000
## 
## adonis2(formula = data.dist ~ outcome + Sex + brotherPairID, data = fullMetadata_OFFS[fullMetadata_OFFS$trtG1G2_NUM %in% trtgp, ], permutations = perm)
## Model: adonis0(formula = ~outcome + Sex + brotherPairID, data = data, method = method)
##               Df SumOfSqs      R2      F   Pr(>F)   
## outcome        1 0.002160 0.02262 1.4047 0.006993 **
## Sex            1 0.002053 0.02150 1.3349 0.192807   
## brotherPairID  7 0.022076 0.23117 2.0507 0.019980 * 
## Residual      45 0.069205 0.72470                   
## Total         54 0.095494 1.00000                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Results:

  • brother pair explains 22% of the observed variance p = 0.01
  • sex explains 2.1% of the observed variance (non significant)
  • Offspring trt explains 2.3% of the observed variance p < 0.01
NMDSanalysis_G1infected <- myNMDSFUN(dataset = uniteCovALL_G2_woSexAndUnknowChr,
                                     metadata = fullMetadata_OFFS, myseed = 25,
                                     byParentTrt=TRUE,
                                     trtgp = c(5,6))

#png(filename = "../../dataOut/NMDSplot_G1fromInfectedG2.png", width = 900, height = 900)
NMDSanalysis_G1infected$NMDSplot

#dev.off()

3 Differential methylations “Bottom up”: DM in offspring

3.1 Description of DMS in the four different comparisons

Differential methylation by brother pair (sex as covariate):

  1. CC-TC = CONTROL fish (parent CvsT)
  2. CT-TT = TREATMENT fish (parent CvsT)
  3. CC-CT = fish from CONTROL parents (G2 CvsT)
  4. TC-TT = fish from TREATMENT parents (G2 CvsT)

3.1.1 Attribute plot of the DMS in every BP for the 4 comparisons

## All DMS are stored in DMSBPlist by brother pair

## Vector of all 4 comparisons
vecCompa <- c("CC_TC", "CT_TT", "CC_CT", "TC_TT")
vecCompaVerbose <- c("Control offspring in control vs infected parents", "Infected offspring in control vs infected parents", "Control vs infected offspring from control parent", "Control vs infected offspring from infected parent") # this is useful in plots

## Extract DMS for all 4 comparisons (by position)
myPosList = lapply(DMSBPlist, lapply, function(x){paste(x$chr, x$end)})

## Subselect those DMS present in at least 4 out of 8 BP
get2keep = function(Compa, NBP = 4){
  x <- lapply(myPosList, function(x){unlist(x[[paste0("DMS_15pc_BP_", Compa)]])})
  f <- table(unlist((x))) # each DMS present between 1 and 8 times
  tokeep <- names(f)[f >= NBP]
  # print(length(tokeep))
  ## Keep the DMS present in 4 families minimum
  DMSBPlist_INTER4 <- lapply(x, function(x){x[x %in% tokeep]})
  ## Reorder by family:
  DMSBPlist_INTER4 <- DMSBPlist_INTER4[names(DMSBPlist_INTER4)[order(names(DMSBPlist_INTER4))]]
  return(DMSBPlist_INTER4)
}

## Prepare df for complexUpset
getUpsetDF = function(i, NBP){ # for a given comparison
  A = get2keep(vecCompa[i], NBP)
  A2 = lapply(A, function(x){
    x = data.frame(x)    # vector of DMS as df
    names(x) = "DMS"    # name each CpG
    return(x)
  })
  ## Add BP name
  for (i in 1:length(names(A2))){
    A2[[i]]["BP"] = names(A2)[i]
  }
  # make a dataframe
  A2 = A2 %>% reduce(full_join, by = "DMS")
  # names column with BP id
  for (i in 2:ncol(A2)) {names(A2)[i] = unique(A2[!is.na(A2[i]), i])}
  # replace by 0 or 1 the DMS absence/presence
  A = data.frame(apply(A2[2:9], 2, function(x) ifelse(is.na(x), 0, 1)))
  # add DMS
  A$DMS = A2$DMS
  return(A)
}

Make upset plots for DMS found in at least 6 BP:

for (i in 1:4){
  df = getUpsetDF(i, NBP = 6)
  print(ComplexUpset::upset(
    df,
    names(df)[1:8],
    width_ratio=0.1,
    themes=upset_default_themes(text=element_text(size=15)),
    sort_intersections_by=c('degree', 'cardinality'),
    queries=query_by_degree(
      df,  names(df)[1:8],
      params_by_degree=list(
        '1'=list(color='red', fill='red'),
        '2'=list(color='purple', fill='purple'),
        '3'=list(color='blue', fill='blue'),
        '4'=list(color='grey', fill='grey'),
        '5'=list(color='red', fill='red'),
        '6'=list(color='purple', fill='purple'),
        '7'=list(color='blue', fill='blue'),
        '8'=list(color='green', fill='green')
      ),
      only_components=c("intersections_matrix", "Intersection size")
    )) + ggtitle(paste0("Differentially methylated sites found in more than six brother pairs in the comparison: \n", vecCompaVerbose[i]))) #+ theme(plot.title = element_text(size = 30)))
}

3.2 Statistical setup

PARENTAL effect: DMS found in either CC-TC or CT-TT comparisons OFFSPRING effect: DMS found in either CC-CT or TC-TT comparisons INTERACTION effects: DMS found in CC-CT which show a differential methylation (not necessarily significant) in the opposite direction in TC-TT, or inversely

3.2.1 Identify the different DMS groups

## Creates a data frame with BP, DMS, effect and comparison, when the DMS is found in 4 BP or more
get2keepFULLdfBP = function(Compa, NBP = 4, myeffect = "Paternal"){
  x <- lapply(myPosList, function(x){unlist(x[[paste0("DMS_15pc_BP_", Compa)]])})
  mylist = list() # empty list
  for (i in 1:length(names(x))){
    a=data.frame(BP=names(x)[i], DMS=x[[names(x)[i]]])
    mylist[[i]] = a
    names(mylist)[i] = names(x)[i]
  } # fill full list with df containing BP and DMS
  mydf = Reduce(function(...) merge(..., all=T), mylist) # concatenate in a df
  # Add information
  mydf$globalEffect = myeffect
  mydf$comparison = Compa
  # Keep the DMS present in 4 families minimum:
  tokeep = names(table(mydf$DMS)[table(mydf$DMS)>=NBP])
  mydf = mydf[mydf$DMS %in% tokeep,]
  return(mydf)
}

# PARENTAL effect: DMS found in either CC-TC or CT-TT comparisons
df_PaternalEffect_4BPmin = unique(rbind(get2keepFULLdfBP(Compa = vecCompa[1], NBP = 4, myeffect = "G1all"),
                 get2keepFULLdfBP(Compa = vecCompa[2], NBP = 4, myeffect = "G1all")))
DMS_PaternalEffect_4BPmin = unique(df_PaternalEffect_4BPmin$DMS)

# OFFSPRING effect: DMS found in either CC-CT or TC-TT comparisons
df_OffspringEffect_4BPmin = unique(rbind(get2keepFULLdfBP(Compa = vecCompa[3], NBP = 4, myeffect = "G2all"),
                 get2keepFULLdfBP(Compa = vecCompa[4], NBP = 4, myeffect = "G2all")))
DMS_OffspringEffect_4BPmin = unique(df_OffspringEffect_4BPmin$DMS)

##########
## OVERLAP=DMS found in G1 & G2
# case 1 -> INTERACTION effects DMS found in CC-CT which show a differential methylation in the opposite direction in TC-TT, or inversely (reaction norm are inversed) in most of the brother pairs (5 or more)
# case 2 -> ADDITIVE effect: no slope inversion (4 or more)

# Get the raw methylation from the DMS which are in both Paternal and Offspring effects
subUnite = methylKit::select(uniteCov14_G2_woSexAndUnknowChrOVERLAP,
                             which(paste(uniteCov14_G2_woSexAndUnknowChrOVERLAP$chr, uniteCov14_G2_woSexAndUnknowChrOVERLAP$end) %in%
                                     intersect(DMS_OffspringEffect_4BPmin, DMS_PaternalEffect_4BPmin)))

# Get mean methylation per brother pair, per treatment:
getMeanMeth <- function(subUnite, BP, mytrt){
  metadata = fullMetadata_OFFS[fullMetadata_OFFS$brotherPairID %in% BP & fullMetadata_OFFS$trtG1G2 %in% mytrt, ]
  myuniteCov = reorganize(methylObj = subUnite, treatment = metadata$trtG1G2_NUM, sample.ids = metadata$ID)
  ## remove bases where NO fish in this BP has a coverage
  myuniteCov = methylKit::select(myuniteCov, which(!is.na(rowSums(percMethylation(myuniteCov)))))
  # calculate mean methylation
  df = data.frame(DMS = paste(myuniteCov$chr, myuniteCov$end), meanMeth = rowMeans(percMethylation(myuniteCov)), trt = mytrt, BP = BP)
  return(df)
}

# We will apply the following function to all BP and all trt:
vecBP <- unique(fullMetadata_OFFS$brotherPairID)
vectrt <- unique(fullMetadata_OFFS$trtG1G2)

## Loop over all BP & trt
df = data.frame(DMS=NULL, meanMeth=NULL, trt=NULL, BP=NULL) # empty df
for (i in 1:length(vecBP)){
  for (j in 1:length(vectrt)){
    subdf = getMeanMeth(subUnite = subUnite, BP = vecBP[[i]], mytrt = vectrt[[j]])
    df = rbind(df, subdf)
  }
}

## Add G1 & G2 trt
df$G1trt = ifelse(df$trt %in% c("NE_control", "NE_exposed"), "control", "infected")
df$G2trt = ifelse(df$trt %in% c("NE_control", "E_control"), "control", "infected")

## cut by G1 trt & merge
dfcp = df[df$G1trt %in% "control"  ,]
dfcpco = dfcp[dfcp$G2trt %in% "control",]; dfcpio = dfcp[dfcp$G2trt %in% "infected",]
dfcp = merge(dfcpco, dfcpio, by = c("DMS", "BP")) %>%
  mutate(meanDiffMeth=meanMeth.y - meanMeth.x) %>% dplyr::select(c("DMS", "BP", "meanDiffMeth"))
dfip = df[df$G1trt %in% "infected",]
dfipco = dfip[dfip$G2trt %in% "control",]; dfipio = dfip[dfip$G2trt %in% "infected",]
dfip = merge(dfipco, dfipio, by = c("DMS", "BP")) %>%
  mutate(meanDiffMeth=meanMeth.y - meanMeth.x) %>% dplyr::select(c("DMS", "BP", "meanDiffMeth"))
df2=merge(dfcp,dfip, by=c("DMS", "BP"))

## interaction if slope inversion/additive if not
df2$inversionSlopeReactionNorms = FALSE
df2[!sign(df2$meanDiffMeth.x) == sign(df2$meanDiffMeth.y),"inversionSlopeReactionNorms"] = TRUE
names(df2)[names(df2) %in% "meanDiffMeth.x"] = "meanDiffMeth.controlG1"
names(df2)[names(df2) %in% "meanDiffMeth.y"] = "meanDiffMeth.infectedG1"

####################
# Get a vector of DMS for each category:
## A DMS is "interaction" if there are more often slope inversion than not
DMS_G1G2interactionEffect_4BPmin = df2 %>% group_by(DMS) %>% dplyr::summarise(count=n(), inversionSlopeRate=sum(inversionSlopeReactionNorms)/count, 
                                                                              effect=ifelse(inversionSlopeRate>0.5, "interaction", "additive")) %>%
  dplyr::filter(effect=="interaction") %>% dplyr::select(DMS) %>% unlist()
DMS_G1G2additiveEffect_4BPmin = df2 %>% group_by(DMS) %>% dplyr::summarise(count=n(), inversionSlopeRate=sum(inversionSlopeReactionNorms)/count, 
                                                                              effect=ifelse(inversionSlopeRate>0.5, "interaction", "additive")) %>%
  dplyr::filter(effect=="additive") %>% dplyr::select(DMS) %>% unlist()
DMS_G1onlyEffect_4BPmin = DMS_PaternalEffect_4BPmin[!DMS_PaternalEffect_4BPmin %in% c(DMS_G1G2interactionEffect_4BPmin, DMS_G1G2additiveEffect_4BPmin)]
DMS_G2onlyEffect_4BPmin = DMS_OffspringEffect_4BPmin[!DMS_OffspringEffect_4BPmin %in% c(DMS_G1G2interactionEffect_4BPmin, DMS_G1G2additiveEffect_4BPmin)]

####################
# Make a BIG df with all DMS, effects and BP (this time, the effects are DMS-BP specific, not global)
df_effects_full = merge(unique(df_PaternalEffect_4BPmin[c("BP", "DMS","globalEffect")]),
      unique(df_OffspringEffect_4BPmin[c("BP", "DMS","globalEffect")]), by=c("BP", "DMS"), all=T)

df_effects_full = merge(df_effects_full, df2, all=T)
df_effects_full$effectBPlevel[df_effects_full$globalEffect.x == "G1all"] = "G1"
df_effects_full$effectBPlevel[df_effects_full$globalEffect.y == "G2all"] = "G2"
df_effects_full$effectBPlevel[df_effects_full$inversionSlopeReactionNorms == TRUE & 
                         df_effects_full$globalEffect.x == "G1all" & df_effects_full$globalEffect.y == "G2all"] = "G1:G2"
df_effects_full$effectBPlevel[df_effects_full$inversionSlopeReactionNorms == FALSE & 
                         df_effects_full$globalEffect.x == "G1all" & df_effects_full$globalEffect.y == "G2all"]= "G1+G2"

#rm junk
rm(subUnite, df, dfcp, dfcpco, dfcpio, dfip, dfipco, dfipio, df2)
# Plot a Venn diagram
ggVennDiagram(list("Paternal effect" = DMS_PaternalEffect_4BPmin, "Offspring effect" = DMS_OffspringEffect_4BPmin, "InteractionEffects" = DMS_G1G2interactionEffect_4BPmin),
              label_alpha = 0) + scale_color_manual(values = c(1,1,1))+
  scale_fill_gradient(low="white",high = "yellow") + theme(legend.position = "none")

# Save:
pdf(file = "../../dataOut/DMS3groupsVenn.pdf", width = 7, height = 6)
ggVennDiagram(list("Paternal effect" = DMS_PaternalEffect_4BPmin, "Offspring effect" = DMS_OffspringEffect_4BPmin, "InteractionEffects" = DMS_G1G2interactionEffect_4BPmin),
              label_alpha = 0) + scale_color_manual(values = c(1,1,1))+
  scale_fill_gradient(low="white",high = "yellow") + theme(legend.position = "none")
dev.off()
## png 
##   2

The size of the different sections of the Venn diagram are as follows:

  • length(DMS_G1onlyEffect_4BPmin): 1639
  • length(DMS_G2onlyEffect_4BPmin): 309
  • length(DMS_G1G2additiveEffect_4BPmin): 173
  • length(DMS_G1G2interactionEffect_4BPmin): 151

3.2.2 Annotate the different DMS groups

DMS_G1onlyEffect_4BPmin_ANNOT = myHomebrewDMSannotation(DMSvec = DMS_G1onlyEffect_4BPmin,
                                                        myannotBed12 = annotBed12, myannotGff3 = annotGff3)
## [1] "check that these features are identical:"
## [1] "gasAcul16628-RA" "gasAcul16627-RA"
## [1] "check that these features are identical:"
## [1] "gasAcul15294-RA" "gasAcul15295-RA"
## [1] "check that these features are identical:"
## [1] "gasAcul19985-RA" "gasAcul19984-RA"
DMS_G1onlyEffect_4BPmin_ANNOT=DMS_G1onlyEffect_4BPmin_ANNOT %>% mutate(effect = "G1")
# "check that these features are identical:"
# "gasAcul16628-RA" "gasAcul16627-RA" -> overlapping: Protein of unknown function & Tp63
# "gasAcul15294-RA" "gasAcul15295-RA" -> overlapping: Cdh23 & Vsir (immune!)
# "gasAcul19985-RA" "gasAcul19984-RA" -> overlapping: ST3GAL1 & ST3GAL1, all good for this one
DMS_G2onlyEffect_4BPmin_ANNOT = myHomebrewDMSannotation(DMSvec = DMS_G2onlyEffect_4BPmin,
                                                        myannotBed12 = annotBed12, myannotGff3 = annotGff3)
DMS_G2onlyEffect_4BPmin_ANNOT=DMS_G2onlyEffect_4BPmin_ANNOT %>% mutate(effect = "G2")

DMS_G1G2additiveEffect_4BPmin_ANNOT = myHomebrewDMSannotation(DMSvec = DMS_G1G2additiveEffect_4BPmin,
                                                              myannotBed12 = annotBed12, myannotGff3 = annotGff3)
DMS_G1G2additiveEffect_4BPmin_ANNOT=DMS_G1G2additiveEffect_4BPmin_ANNOT %>% mutate(effect = "G1+G2")

DMS_G1G2interactionEffect_4BPmin_ANNOT = myHomebrewDMSannotation(DMSvec = DMS_G1G2interactionEffect_4BPmin,
                                                                 myannotBed12 = annotBed12, myannotGff3 = annotGff3)
## [1] "check that these features are identical:"
## [1] "gasAcul04256-RA" "gasAcul04255-RA"
## [1] "check that these features are identical:"
## [1] "gasAcul04256-RA" "gasAcul04255-RA"
DMS_G1G2interactionEffect_4BPmin_ANNOT=DMS_G1G2interactionEffect_4BPmin_ANNOT%>% mutate(effect = "G1:G2")
# "check that these features are identical:"
# "gasAcul04256-RA" "gasAcul04255-RA" -> overlapping: Protein if unknown function & Proteolipid protein DM beta

# Plot a Venn diagram to see genes in common
pdf(file = "../../dataOut/DMSgroupsVenn_geneLevel.pdf", width = 7, height = 6)
ggVennDiagram(list("G1" = DMS_G1onlyEffect_4BPmin_ANNOT$feature.name, "G2" = DMS_G2onlyEffect_4BPmin_ANNOT$feature.name,
                   "G1+G2" = DMS_G1G2additiveEffect_4BPmin_ANNOT$feature.name, "G1:G2" = DMS_G1G2interactionEffect_4BPmin_ANNOT$feature.name),
              label_alpha = 0) + scale_color_manual(values = c(1,1,1,1))+
  scale_fill_gradient(low="white",high = "yellow") + theme(legend.position = "none") + ggtitle("Genes in each effect")
dev.off()
## png 
##   2
## NB some genes have DMS in different effects!
# A gene has DMSs in the 4 effects!
geneAll4 = intersect(intersect(intersect(DMS_G1onlyEffect_4BPmin_ANNOT$feature.name, DMS_G2onlyEffect_4BPmin_ANNOT$feature.name),
                               DMS_G1G2additiveEffect_4BPmin_ANNOT$feature.name), DMS_G1G2interactionEffect_4BPmin_ANNOT$feature.name)
DMS_G1onlyEffect_4BPmin_ANNOT[DMS_G1onlyEffect_4BPmin_ANNOT$feature.name %in% geneAll4,]
##     GeneSymbol    feature.name       chrom    start      end width strand.x
## 1        FKBP3 gasAcul22163-RA Gy_chrXVIII 14054992 14054992     1        *
## 2        FKBP3 gasAcul22163-RA Gy_chrXVIII 14054930 14054930     1        *
## 3        FKBP3 gasAcul22163-RA Gy_chrXVIII 14053519 14053519     1        *
## 4        FKBP3 gasAcul22163-RA Gy_chrXVIII 14054980 14054980     1        *
## 5        FKBP3 gasAcul22163-RA Gy_chrXVIII 14054965 14054965     1        *
## 6        FKBP3 gasAcul22163-RA Gy_chrXVIII 14054983 14054983     1        *
## 7        FKBP3 gasAcul22163-RA Gy_chrXVIII 14054932 14054932     1        *
## 8        FKBP3 gasAcul22163-RA Gy_chrXVIII 14053497 14053497     1        *
## 631       DPP6 gasAcul22719-RA   Gy_chrXXI 13299709 13299709     1        *
## 632       DPP6 gasAcul22719-RA   Gy_chrXXI 13299666 13299666     1        *
## 633       DPP6 gasAcul22719-RA   Gy_chrXXI 13299696 13299696     1        *
##                      DMS featureType
## 1   Gy_chrXVIII 14054992  intergenic
## 2   Gy_chrXVIII 14054930  intergenic
## 3   Gy_chrXVIII 14053519  intergenic
## 4   Gy_chrXVIII 14054980  intergenic
## 5   Gy_chrXVIII 14054965  intergenic
## 6   Gy_chrXVIII 14054983  intergenic
## 7   Gy_chrXVIII 14054932  intergenic
## 8   Gy_chrXVIII 14053497  intergenic
## 631   Gy_chrXXI 13299709      intron
## 632   Gy_chrXXI 13299666      intron
## 633   Gy_chrXXI 13299696      intron
##                                                                                       Note
## 1         Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 2         Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 3         Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 4         Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 5         Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 6         Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 7         Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 8         Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 631 Similar to Dpp6: Dipeptidyl aminopeptidase-like protein 6 (Rattus norvegicus OX=10116)
## 632 Similar to Dpp6: Dipeptidyl aminopeptidase-like protein 6 (Rattus norvegicus OX=10116)
## 633 Similar to Dpp6: Dipeptidyl aminopeptidase-like protein 6 (Rattus norvegicus OX=10116)
##              Ontology_term start.gene end.gene strand.y       Parent
## 1               GO:0003755   14048181 14049908        - gasAcul22163
## 2               GO:0003755   14048181 14049908        - gasAcul22163
## 3               GO:0003755   14048181 14049908        - gasAcul22163
## 4               GO:0003755   14048181 14049908        - gasAcul22163
## 5               GO:0003755   14048181 14049908        - gasAcul22163
## 6               GO:0003755   14048181 14049908        - gasAcul22163
## 7               GO:0003755   14048181 14049908        - gasAcul22163
## 8               GO:0003755   14048181 14049908        - gasAcul22163
## 631 GO:0006508, GO:0008236   13289971 13323117        + gasAcul22719
## 632 GO:0006508, GO:0008236   13289971 13323117        + gasAcul22719
## 633 GO:0006508, GO:0008236   13289971 13323117        + gasAcul22719
##     nDMSperGene geneLengthkb nDMSperGenekb ENTREZID                 description
## 1             8        1.727          4.63     2287     FKBP prolyl isomerase 3
## 2             8        1.727          4.63     2287     FKBP prolyl isomerase 3
## 3             8        1.727          4.63     2287     FKBP prolyl isomerase 3
## 4             8        1.727          4.63     2287     FKBP prolyl isomerase 3
## 5             8        1.727          4.63     2287     FKBP prolyl isomerase 3
## 6             8        1.727          4.63     2287     FKBP prolyl isomerase 3
## 7             8        1.727          4.63     2287     FKBP prolyl isomerase 3
## 8             8        1.727          4.63     2287     FKBP prolyl isomerase 3
## 631           3       33.146          0.09     1804 dipeptidyl peptidase like 6
## 632           3       33.146          0.09     1804 dipeptidyl peptidase like 6
## 633           3       33.146          0.09     1804 dipeptidyl peptidase like 6
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         summary
## 1                                                                                              The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 2                                                                                              The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 3                                                                                              The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 4                                                                                              The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 5                                                                                              The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 6                                                                                              The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 7                                                                                              The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 8                                                                                              The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 631 This gene encodes a single-pass type II membrane protein that is a member of the peptidase S9B family of serine proteases. This protein has no detectable protease activity, most likely due to the absence of the conserved serine residue normally present in the catalytic domain of serine proteases. However, it does bind specific voltage-gated potassium channels and alters their expression and biophysical properties. Variations in this gene may be associated with susceptibility to amyotrophic lateral sclerosis and with idiopathic ventricular fibrillation. Alternative splicing results in multiple transcript variants. [provided by RefSeq, Mar 2014]
## 632 This gene encodes a single-pass type II membrane protein that is a member of the peptidase S9B family of serine proteases. This protein has no detectable protease activity, most likely due to the absence of the conserved serine residue normally present in the catalytic domain of serine proteases. However, it does bind specific voltage-gated potassium channels and alters their expression and biophysical properties. Variations in this gene may be associated with susceptibility to amyotrophic lateral sclerosis and with idiopathic ventricular fibrillation. Alternative splicing results in multiple transcript variants. [provided by RefSeq, Mar 2014]
## 633 This gene encodes a single-pass type II membrane protein that is a member of the peptidase S9B family of serine proteases. This protein has no detectable protease activity, most likely due to the absence of the conserved serine residue normally present in the catalytic domain of serine proteases. However, it does bind specific voltage-gated potassium channels and alters their expression and biophysical properties. Variations in this gene may be associated with susceptibility to amyotrophic lateral sclerosis and with idiopathic ventricular fibrillation. Alternative splicing results in multiple transcript variants. [provided by RefSeq, Mar 2014]
##     effect
## 1       G1
## 2       G1
## 3       G1
## 4       G1
## 5       G1
## 6       G1
## 7       G1
## 8       G1
## 631     G1
## 632     G1
## 633     G1
## FKBP3 & DPP6

3.2.2.1 Focus on gene FKBP3

plotGeneTarget <- function(myTargetGene, myannotBed12=annotBed12){
  # plotdf
  dfplot = rbind(DMS_G1onlyEffect_4BPmin_ANNOT[DMS_G1onlyEffect_4BPmin_ANNOT$GeneSymbol %in% myTargetGene,],
                 DMS_G2onlyEffect_4BPmin_ANNOT[DMS_G2onlyEffect_4BPmin_ANNOT$GeneSymbol %in% myTargetGene,],
                 DMS_G1G2additiveEffect_4BPmin_ANNOT[DMS_G1G2additiveEffect_4BPmin_ANNOT$GeneSymbol %in% myTargetGene,],
                 DMS_G1G2interactionEffect_4BPmin_ANNOT[DMS_G1G2interactionEffect_4BPmin_ANNOT$GeneSymbol %in% myTargetGene,])
  # Find TSS position of the gene
  dfplot$TSSpos = myannotBed12$TSSes[myannotBed12$TSSes$name %in% dfplot$feature.name]@ranges@start
  # Set TSS as origin
  dfplot$start_distToTSS = dfplot$start - dfplot$TSSpos 
  dfplot$end_distToTSS = dfplot$end - dfplot$TSSpos 
  dfplot$start.gene_distToTSS = dfplot$start.gene - dfplot$TSSpos 
  dfplot$end.gene_distToTSS = dfplot$end.gene - dfplot$TSSpos 
  # Reorder effects factor for legend
  dfplot$effect <- factor(dfplot$effect, levels = c("G1", "G2", "G1+G2", "G1:G2"))
   # Prepare rectangles
  mini=min(dfplot$start.gene_distToTSS, dfplot$start_distToTSS)
  maxi=max(dfplot$end.gene_distToTSS, dfplot$end_distToTSS)+100
  start.gene_distToTSS = unique(dfplot$start.gene_distToTSS)
  end.gene_distToTSS = unique(dfplot$end.gene_distToTSS)
  # Plot
  plotGeneTarget= ggplot(dfplot) +
    geom_rect(xmin=mini, xmax=maxi, ymin=0, ymax =.5, fill="#bfb6b6")+
    geom_rect(aes(xmin=start.gene_distToTSS, xmax=end.gene_distToTSS, ymin=0, ymax =.5), fill = "black")+
    geom_point(aes(x = start_distToTSS, y = .8, col = effect, pch=featureType), size = 5) +
    geom_segment(aes(x = start_distToTSS, xend = start_distToTSS, y=0, yend=.8, col = effect)) +
    geom_segment(aes(x = 0, xend = 0, y=0, yend=.5), col = "red", size = 3) + # TSS
    theme_blank() +
    theme(axis.title.x = element_blank(), axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank())+
    labs(title = paste(unique(dfplot$GeneSymbol), ":", dfplot$description),
         subtitle = str_wrap(dfplot$summary, width = 150))+
  scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#CC79A7"))#cb friendly palette
  ####### And by brother pairs
  dfplot_BP = merge(df_effects_full[df_effects_full$DMS %in% dfplot$DMS,c("BP", "DMS", "effectBPlevel")], dfplot)
  # Rm potision with no effect in this BP
  dfplot_BP=dfplot_BP[!is.na(dfplot_BP$effectBPlevel),]
  # Same order of Father's family as in figure 1 (clusters)
  dfplot_BP$BP = factor(dfplot_BP$BP, levels = c("BP05", "BP31", "BP04", "BP30", "BP16", "BP34", "BP49","BP46"))
  # Plot
  plotGeneTargetBP = ggplot(dfplot_BP) +
    geom_rect(xmin=mini, xmax=maxi, ymin=0, ymax =.5, fill="#bfb6b6")+
    geom_rect(aes(xmin=start.gene_distToTSS, xmax=end.gene_distToTSS, ymin=0, ymax =.5), fill = "black")+
    geom_point(aes(x = start_distToTSS, y = .8, col = effectBPlevel, pch=featureType), size = 5) +
    geom_segment(aes(x = start_distToTSS, xend = start_distToTSS, y=0, yend=.8, col = effectBPlevel)) +
    geom_segment(aes(x = 0, xend = 0, y=0, yend=.5), col = "red", size = 3) + # TSS
    theme_blank() +
    facet_grid(BP~.)+ theme(panel.spacing = unit(1.5, "lines"))+
    theme(axis.title.x = element_blank(), axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank())+
    scale_y_continuous(expand=expansion(mult=c(0,0.15))) # increase space up
    scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#CC79A7"))#cb friendly palette
  
  return(list(myTargetGene_DMSdf=dfplot, plotGeneTarget=plotGeneTarget, plotGeneTargetBP=plotGeneTargetBP))
}

## All 4 effects:
P = plotGeneTarget(myTargetGene = "FKBP3")

P$myTargetGene_DMSdf
##    GeneSymbol    feature.name       chrom    start      end width strand.x
## 1       FKBP3 gasAcul22163-RA Gy_chrXVIII 14054992 14054992     1        *
## 2       FKBP3 gasAcul22163-RA Gy_chrXVIII 14054930 14054930     1        *
## 3       FKBP3 gasAcul22163-RA Gy_chrXVIII 14053519 14053519     1        *
## 4       FKBP3 gasAcul22163-RA Gy_chrXVIII 14054980 14054980     1        *
## 5       FKBP3 gasAcul22163-RA Gy_chrXVIII 14054965 14054965     1        *
## 6       FKBP3 gasAcul22163-RA Gy_chrXVIII 14054983 14054983     1        *
## 7       FKBP3 gasAcul22163-RA Gy_chrXVIII 14054932 14054932     1        *
## 8       FKBP3 gasAcul22163-RA Gy_chrXVIII 14053497 14053497     1        *
## 9       FKBP3 gasAcul22163-RA Gy_chrXVIII 14053503 14053503     1        *
## 10      FKBP3 gasAcul22163-RA Gy_chrXVIII 14054800 14054800     1        *
## 11      FKBP3 gasAcul22163-RA Gy_chrXVIII 14054854 14054854     1        *
## 12      FKBP3 gasAcul22163-RA Gy_chrXVIII 14054831 14054831     1        *
## 13      FKBP3 gasAcul22163-RA Gy_chrXVIII 14053506 14053506     1        *
## 21      FKBP3 gasAcul22163-RA Gy_chrXVIII 14053508 14053508     1        *
## 31      FKBP3 gasAcul22163-RA Gy_chrXVIII 14053521 14053521     1        *
## 41      FKBP3 gasAcul22163-RA Gy_chrXVIII 14053542 14053542     1        *
## 51      FKBP3 gasAcul22163-RA Gy_chrXVIII 14054809 14054809     1        *
## 61      FKBP3 gasAcul22163-RA Gy_chrXVIII 14054878 14054878     1        *
## 71      FKBP3 gasAcul22163-RA Gy_chrXVIII 14054885 14054885     1        *
## 81      FKBP3 gasAcul22163-RA Gy_chrXVIII 14054887 14054887     1        *
## 14      FKBP3 gasAcul22163-RA Gy_chrXVIII 14054874 14054874     1        *
## 22      FKBP3 gasAcul22163-RA Gy_chrXVIII 14054881 14054881     1        *
## 32      FKBP3 gasAcul22163-RA Gy_chrXVIII 14054876 14054876     1        *
##                     DMS featureType
## 1  Gy_chrXVIII 14054992  intergenic
## 2  Gy_chrXVIII 14054930  intergenic
## 3  Gy_chrXVIII 14053519  intergenic
## 4  Gy_chrXVIII 14054980  intergenic
## 5  Gy_chrXVIII 14054965  intergenic
## 6  Gy_chrXVIII 14054983  intergenic
## 7  Gy_chrXVIII 14054932  intergenic
## 8  Gy_chrXVIII 14053497  intergenic
## 9  Gy_chrXVIII 14053503  intergenic
## 10 Gy_chrXVIII 14054800  intergenic
## 11 Gy_chrXVIII 14054854  intergenic
## 12 Gy_chrXVIII 14054831  intergenic
## 13 Gy_chrXVIII 14053506  intergenic
## 21 Gy_chrXVIII 14053508  intergenic
## 31 Gy_chrXVIII 14053521  intergenic
## 41 Gy_chrXVIII 14053542  intergenic
## 51 Gy_chrXVIII 14054809  intergenic
## 61 Gy_chrXVIII 14054878  intergenic
## 71 Gy_chrXVIII 14054885  intergenic
## 81 Gy_chrXVIII 14054887  intergenic
## 14 Gy_chrXVIII 14054874  intergenic
## 22 Gy_chrXVIII 14054881  intergenic
## 32 Gy_chrXVIII 14054876  intergenic
##                                                                                Note
## 1  Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 2  Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 3  Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 4  Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 5  Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 6  Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 7  Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 8  Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 9  Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 10 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 11 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 12 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 13 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 21 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 31 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 41 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 51 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 61 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 71 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 81 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 14 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 22 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 32 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
##    Ontology_term start.gene end.gene strand.y       Parent nDMSperGene
## 1     GO:0003755   14048181 14049908        - gasAcul22163           8
## 2     GO:0003755   14048181 14049908        - gasAcul22163           8
## 3     GO:0003755   14048181 14049908        - gasAcul22163           8
## 4     GO:0003755   14048181 14049908        - gasAcul22163           8
## 5     GO:0003755   14048181 14049908        - gasAcul22163           8
## 6     GO:0003755   14048181 14049908        - gasAcul22163           8
## 7     GO:0003755   14048181 14049908        - gasAcul22163           8
## 8     GO:0003755   14048181 14049908        - gasAcul22163           8
## 9     GO:0003755   14048181 14049908        - gasAcul22163           4
## 10    GO:0003755   14048181 14049908        - gasAcul22163           4
## 11    GO:0003755   14048181 14049908        - gasAcul22163           4
## 12    GO:0003755   14048181 14049908        - gasAcul22163           4
## 13    GO:0003755   14048181 14049908        - gasAcul22163           8
## 21    GO:0003755   14048181 14049908        - gasAcul22163           8
## 31    GO:0003755   14048181 14049908        - gasAcul22163           8
## 41    GO:0003755   14048181 14049908        - gasAcul22163           8
## 51    GO:0003755   14048181 14049908        - gasAcul22163           8
## 61    GO:0003755   14048181 14049908        - gasAcul22163           8
## 71    GO:0003755   14048181 14049908        - gasAcul22163           8
## 81    GO:0003755   14048181 14049908        - gasAcul22163           8
## 14    GO:0003755   14048181 14049908        - gasAcul22163           3
## 22    GO:0003755   14048181 14049908        - gasAcul22163           3
## 32    GO:0003755   14048181 14049908        - gasAcul22163           3
##    geneLengthkb nDMSperGenekb ENTREZID             description
## 1         1.727          4.63     2287 FKBP prolyl isomerase 3
## 2         1.727          4.63     2287 FKBP prolyl isomerase 3
## 3         1.727          4.63     2287 FKBP prolyl isomerase 3
## 4         1.727          4.63     2287 FKBP prolyl isomerase 3
## 5         1.727          4.63     2287 FKBP prolyl isomerase 3
## 6         1.727          4.63     2287 FKBP prolyl isomerase 3
## 7         1.727          4.63     2287 FKBP prolyl isomerase 3
## 8         1.727          4.63     2287 FKBP prolyl isomerase 3
## 9         1.727          2.32     2287 FKBP prolyl isomerase 3
## 10        1.727          2.32     2287 FKBP prolyl isomerase 3
## 11        1.727          2.32     2287 FKBP prolyl isomerase 3
## 12        1.727          2.32     2287 FKBP prolyl isomerase 3
## 13        1.727          4.63     2287 FKBP prolyl isomerase 3
## 21        1.727          4.63     2287 FKBP prolyl isomerase 3
## 31        1.727          4.63     2287 FKBP prolyl isomerase 3
## 41        1.727          4.63     2287 FKBP prolyl isomerase 3
## 51        1.727          4.63     2287 FKBP prolyl isomerase 3
## 61        1.727          4.63     2287 FKBP prolyl isomerase 3
## 71        1.727          4.63     2287 FKBP prolyl isomerase 3
## 81        1.727          4.63     2287 FKBP prolyl isomerase 3
## 14        1.727          1.74     2287 FKBP prolyl isomerase 3
## 22        1.727          1.74     2287 FKBP prolyl isomerase 3
## 32        1.727          1.74     2287 FKBP prolyl isomerase 3
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             summary
## 1  The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 2  The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 3  The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 4  The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 5  The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 6  The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 7  The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 8  The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 9  The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 10 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 11 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 12 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 13 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 21 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 31 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 41 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 51 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 61 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 71 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 81 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 14 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 22 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 32 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
##    effect   TSSpos start_distToTSS end_distToTSS start.gene_distToTSS
## 1      G1 14049908            5084          5084                -1727
## 2      G1 14049908            5022          5022                -1727
## 3      G1 14049908            3611          3611                -1727
## 4      G1 14049908            5072          5072                -1727
## 5      G1 14049908            5057          5057                -1727
## 6      G1 14049908            5075          5075                -1727
## 7      G1 14049908            5024          5024                -1727
## 8      G1 14049908            3589          3589                -1727
## 9      G2 14049908            3595          3595                -1727
## 10     G2 14049908            4892          4892                -1727
## 11     G2 14049908            4946          4946                -1727
## 12     G2 14049908            4923          4923                -1727
## 13  G1+G2 14049908            3598          3598                -1727
## 21  G1+G2 14049908            3600          3600                -1727
## 31  G1+G2 14049908            3613          3613                -1727
## 41  G1+G2 14049908            3634          3634                -1727
## 51  G1+G2 14049908            4901          4901                -1727
## 61  G1+G2 14049908            4970          4970                -1727
## 71  G1+G2 14049908            4977          4977                -1727
## 81  G1+G2 14049908            4979          4979                -1727
## 14  G1:G2 14049908            4966          4966                -1727
## 22  G1:G2 14049908            4973          4973                -1727
## 32  G1:G2 14049908            4968          4968                -1727
##    end.gene_distToTSS
## 1                   0
## 2                   0
## 3                   0
## 4                   0
## 5                   0
## 6                   0
## 7                   0
## 8                   0
## 9                   0
## 10                  0
## 11                  0
## 12                  0
## 13                  0
## 21                  0
## 31                  0
## 41                  0
## 51                  0
## 61                  0
## 71                  0
## 81                  0
## 14                  0
## 22                  0
## 32                  0
# Zoom in 1 
P1 = P$plotGeneTarget + coord_cartesian(xlim=c(3588,3636)) + theme(legend.position = "none") +  
  ggtitle("") + labs(subtitle = "")+ theme(plot.background = element_rect(colour = "black", fill=NA, size=1))
# Zoom in 2
P2 = P$plotGeneTarget + coord_cartesian(xlim=c(4890,5060)) + theme(legend.position = "none") +  
  ggtitle("") + labs(subtitle = "")+ theme(plot.background = element_rect(colour = "black", fill=NA, size=1))

## For all BP, zoomed
Pbp1 = P$plotGeneTargetBP + theme(legend.position = "none") + coord_cartesian(xlim=c(3588,3636))+
  theme(plot.background = element_rect(colour = "black", fill=NA, size=1))
Pbp2 = P$plotGeneTargetBP + theme(legend.position = "none") + coord_cartesian(xlim=c(4890,5060))+ 
  theme(plot.background = element_rect(colour = "black", fill=NA, size=1))

fullplotFKBP3 = cowplot::plot_grid(P$plotGeneTarget,
                                   cowplot::plot_grid(P1, P2, nrow = 1, ncol =2), 
                                   cowplot::plot_grid(Pbp1, Pbp2, nrow = 1, ncol =2), 
                                   nrow = 3, rel_heights = c(1,1,3), labels = c("A", "B", "C"))
fullplotFKBP3

# Plot gene and zooms
pdf("../../dataOut/FKBP3_DMS.pdf", width = 15, height = 15)
fullplotFKBP3
dev.off()
## png 
##   2

3.2.3 Gene Ontology analysis OVERALL (all genes with G1, G2 or both effects):

# create gene universe
gene_universe <- data.frame(
  subsetByOverlaps(GRanges(annotGff3), GRanges(uniteCov14_G2_woSexAndUnknowChrOVERLAP))) %>% # subselect covered CpGs
  filter(lengths(Ontology_term)!=0) %>% # rm non existing GO terms
  filter(type %in% "gene")  %>% # keep all the 7416 genes with GO terms
  dplyr::select(c("Name", "Ontology_term")) %>%
  mutate(go_linkage_type = "IEA") %>% #NB: IEA but not necessarily true, it's from Interproscan after Maker. Sticklebacks (biomart) have 82701 IEA and 63 ISS.
  relocate("Ontology_term","go_linkage_type","Name") %>%
  unnest(Ontology_term) %>% # one GO per line (was a list before in this column)
  data.frame()

# Create gene set collection
goFrame <- GOFrame(gene_universe, organism="Gasterosteus aculeatus")
goAllFrame <- GOAllFrame(goFrame)
gsc_universe <- GeneSetCollection(goAllFrame, setType = GOCollection())

IMPORTANT NOTE from Mel: why conditional hypergeometric test? The GO ontology is set up as a directed acyclic graph, where a parent term is comprised of all its child terms. If you do a standard hypergeometric, you might e.g., find ‘positive regulation of kinase activity’ to be significant. If you then test ‘positive regulation of catalytic activity’, which is a parent term, then it might be significant as well, but only because of the terms coming from positive regulation of kinase activity.

The conditional hypergeometric takes this into account, and only uses those terms that were not already significant when testing a higher order (parent) term.

GO_G1only = makedfGO(DMS_G1onlyEffect_4BPmin_ANNOT, gene_universe, effect = "G1only")
GO_G2only = makedfGO(DMS_G2onlyEffect_4BPmin_ANNOT, gene_universe, effect = "G2only")
GO_G1G2addit = makedfGO(DMS_G1G2additiveEffect_4BPmin_ANNOT, gene_universe, effect = "G1+G2")
GO_G1G2inter = makedfGO(DMS_G1G2interactionEffect_4BPmin_ANNOT, gene_universe, effect = "G1:G2")

dfGO = rbind(GO_G1only, GO_G2only, GO_G1G2addit, GO_G1G2inter)

3.2.4 GO plot

GOplot <- dfGO %>% ggplot(aes(x=Effect, y = factor(GO.name))) +
  geom_point(aes(color = p.value.adjusted, size = genePercent)) +
  scale_color_gradient(name="adjusted\np-value", low = "red", high = "blue") +
  scale_size_continuous(name = "% of genes")+
  theme_bw() + ylab("") + xlab("Treatments comparison") +
  theme(legend.box.background = element_rect(fill = "#ebebeb", color = "#ebebeb"),
        legend.background = element_rect(fill = "#ebebeb", color = "#ebebeb"),
        legend.key = element_rect(fill = "#ebebeb", color = "#ebebeb"), legend.position="left") + # grey box for legend
  facet_grid(fct_inorder(GO.category)~., scales="free",space = "free")
GOplot

pdf(GOplot, file = "../../dataOut/GOplot4Venncat.pdf", width = 10, height = 25)
GOplot
dev.off()
## png 
##   2

3.3 Correlation between methylation (after PCA) and phenotype (nbr worms, BCI)

Approach:

For each DMS G1 and/or G2 effects:

  1. Extract methylation values: raw beta values at DMS shared by >4 (or more) BP

  2. PCA

  3. Extract axes 1 & 2

  4. Correlation with parasite load/BCI

  5. If result positive, annotate the CpG associated with significant axis

3.3.1 PCA based on all methylation values at DMS positions detected for PATERNAL effect, with imputation of missing values

# Make PCA and model lmer(BCI ~ PCA1*PCA2*No.Worms*PAT + (1|brotherPairID)+ (1|Sex), data=metadata)
RESPCA <- getPCACpG(DMSvec=unique(c(DMS_PaternalEffect_4BPmin, DMS_OffspringEffect_4BPmin)), effect="all effects")
## [1] "2272 DMS linked with all effects"
## [1] "The chosen model is:"
## BCI ~ PCA2 + No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + 
##     PCA2:PAT + No.Worms:PAT
## <environment: 0x55df205f9a18>
## [1] "1168 CpG sites most correlated (p < 0.05) with the first principal component"
## [1] "1135 CpG sites most correlated (p < 0.05) with the second principal component"
# 2272 DMS linked with all effects
# [1] "The chosen model is:"
# BCI ~ PCA2 + No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + 
#     PCA2:PAT + No.Worms:PAT
# <environment: 0x55fd9c8571e8>
# [1] "1168 CpG sites most correlated (p < 0.05) with the first principal component"
# [1] "1135 CpG sites most correlated (p < 0.05) with the second principal component"

formula(RESPCA$PCA_percAtDMS_imputed$modSel)
## BCI ~ PCA2 + No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + 
##     PCA2:PAT + No.Worms:PAT
## <environment: 0x55df205f9a18>
# The SECOND PCA axis is significant in BCI
# [1] "The chosen model is:"
BCI ~ PCA2 + No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + 
    PCA2:PAT + No.Worms:PAT
## BCI ~ PCA2 + No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + 
##     PCA2:PAT + No.Worms:PAT
### How much of the BCI variance is explained by each variables?
mod_noworms = lmer(BCI ~ PCA2 + PAT + PCA2:PAT + (1 | brotherPairID) + (1 | Sex), 
                   data = RESPCA$PCA_percAtDMS_imputed$metadata)
mod_noPAT = lmer(BCI ~ PCA2 + No.Worms + (1 | brotherPairID) + (1 | Sex), 
                 data = RESPCA$PCA_percAtDMS_imputed$metadata)
mod_noPCA2 = lmer(BCI ~ + No.Worms + PAT + No.Worms:PAT +(1 | brotherPairID) + (1 | Sex), 
                  data = RESPCA$PCA_percAtDMS_imputed$metadata)

# R2c conditional R2 value associated with fixed effects plus the random effects.
A = (MuMIn::r.squaredGLMM(RESPCA$PCA_percAtDMS_imputed$modSel)[2] -
       MuMIn::r.squaredGLMM(mod_noworms)[2])*100
B = (MuMIn::r.squaredGLMM(RESPCA$PCA_percAtDMS_imputed$modSel)[2] -
       MuMIn::r.squaredGLMM(mod_noPAT)[2])*100
C = (MuMIn::r.squaredGLMM(RESPCA$PCA_percAtDMS_imputed$modSel)[2] -
       MuMIn::r.squaredGLMM(mod_noPCA2)[2])*100
round(A, 2) #10.71% of the variance in associated with the parasite load (number of worms)
## [1] 10.71
round(B, 2) #22% of the variance in associated with the paternal treatment
## [1] 22
round(C, 2) #9.47% of the variance in associated with the second PCA axis
## [1] 9.47
### Plot of the model
phenoMethPlot <- plot(ggpredict(RESPCA$PCA_percAtDMS_imputed$modSel, terms = c("No.Worms", "PCA2", "PAT")), add.data = TRUE, alpha = .08) +
  theme_bw() +
  scale_color_gradient(low = "white", high = "red")+
  scale_fill_gradient(low = "white", high = "red") +
  ylab("Body Condition Index") + xlab("Number of worms")+
  ggtitle("Predicted values of Body Condition Index in offspring")
phenoMethPlot

# save
pdf(file = "../../dataOut/phenotypeMeth/phenoMethPlot_alleffects.pdf", width = 7, height = 5)
phenoMethPlot
dev.off()
## png 
##   2

3.3.2 Annotate the genes linked with axis 2 of the PCA

annotPCAaxis <- myHomebrewDMSannotation(DMSvec = paste(RESPCA$CpGPCA2$chr, RESPCA$CpGPCA2$end),
                                        myannotBed12 = annotBed12, myannotGff3 = annotGff3)
## [1] "check that these features are identical:"
## [1] "gasAcul16628-RA" "gasAcul16627-RA"
## [1] "check that these features are identical:"
## [1] "gasAcul04256-RA" "gasAcul04255-RA"
## [1] "check that these features are identical:"
## [1] "gasAcul04256-RA" "gasAcul04255-RA"
## [1] "check that these features are identical:"
## [1] "gasAcul15294-RA" "gasAcul15295-RA"
write.csv(annotPCAaxis %>% dplyr::select(c("GeneSymbol", "feature.name", "Note", "chrom", "nDMSperGenekb", "ENTREZID", "description", "summary"))%>% unique,
          "../../dataOut/annotPCA2_1135DMS.csv", row.names = F)

3.3.3 Plot annotated Manhattan plots TBC for those 1135 positions of PCA2!

P=plotManhattanGenesDMS(annotFile = annotPCAaxis, GYgynogff = GYgynogff)
P

pdf("../../dataOut/ManhattanPlots1135DMS.pdf", width = 10, height = 3)
P
dev.off()
## png 
##   2

3.3.4 GO term for these CpGs

GO_PCA2_1135DMS = makedfGO(annotPCAaxis, gene_universe, effect = "PCAaxis2")

GOplot <- GO_PCA2_1135DMS %>% ggplot(aes(x=Effect, y = factor(GO.name))) +
  geom_point(aes(color = p.value.adjusted, size = genePercent)) +
  scale_color_gradient(name="adjusted\np-value", low = "red", high = "blue") +
  scale_size_continuous(name = "% of genes")+
  theme_bw() + ylab("") + xlab("") +
  theme(legend.box.background = element_rect(fill = "#ebebeb", color = "#ebebeb"),
        legend.background = element_rect(fill = "#ebebeb", color = "#ebebeb"),
        legend.key = element_rect(fill = "#ebebeb", color = "#ebebeb"), legend.position="left") + # grey box for legend
  facet_grid(fct_inorder(GO.category)~., scales="free",space = "free")
GOplot

pdf(GOplot, file = "../../dataOut/GOplotPCA2.pdf", width = 6, height = 7)
GOplot
dev.off()
## png 
##   2

4 Differential methylations “Top down”: DM in parents -> how do they look in offspring?

4.0.1 TREATMENT EFFECT: Differential methylation between control and infected, within each parental treatment

## Features Annotation (use package genomation v1.24.0)
## NB Promoters are defined by options at genomation::readTranscriptFeatures function.
## The default option is to take -1000,+1000bp around the TSS and you can change that.
## -> following Heckwolf 2020 and Sagonas 2020, we consider 1500bp upstream and 500 bp downstream

## Parents comparison:
diffAnn_PAR = annotateWithGeneParts(as(DMSlist$DMS_15pc_G1_C_T,"GRanges"),annotBed12)
## Offspring from control parents comparison:
diffAnn_G2_controlG1 = annotateWithGeneParts(as(DMSlist$DMS_15pc_CC_CT,"GRanges"),annotBed12)
## Offspring from infected parents comparison:
diffAnn_G2_infectedG1 = annotateWithGeneParts(as(DMSlist$DMS_15pc_TC_TT,"GRanges"),annotBed12)
par(mfrow=c(1,3))
par(mar = c(.1,0.1,5,0.1)) # Set the margin on all sides to 2
## Parents comparison:
diffAnn_PAR = annotateWithGeneParts(as(DMSlist$DMS_15pc_G1_C_T,"GRanges"),annotBed12)
diffAnn_PAR
##   promoter       exon     intron intergenic 
##       8.58      15.84      34.13      45.72 
##   promoter       exon     intron intergenic 
##       8.58      13.19      32.51      45.72 
## promoter     exon   intron 
##     1.07     0.19     0.44 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       3    3020    8128   15141   19226  300247
genomation::plotTargetAnnotation(diffAnn_PAR,precedence=TRUE, main="DMS G1", cex.legend = 1, border="white")

## Offspring from control parents comparison:
diffAnn_G2_controlG1 = annotateWithGeneParts(as(DMSlist$DMS_15pc_CC_CT,"GRanges"),annotBed12)
diffAnn_G2_controlG1
##   promoter       exon     intron intergenic 
##      11.28      21.05      30.16      44.44 
##   promoter       exon     intron intergenic 
##      11.28      16.21      28.07      44.44 
## promoter     exon   intron 
##     0.38     0.07     0.14 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      11    2602    6295   14212   15906  261017
genomation::plotTargetAnnotation(diffAnn_G2_controlG1,precedence=TRUE, main="DMS G2-G1c", cex.legend = 1, border="white")
## Offspring from infected parents comparison:
diffAnn_G2_infectedG1 = annotateWithGeneParts(as(DMSlist$DMS_15pc_TC_TT,"GRanges"),annotBed12)
diffAnn_G2_infectedG1
##   promoter       exon     intron intergenic 
##      12.90      13.62      34.64      46.81 
##   promoter       exon     intron intergenic 
##      12.90       9.42      30.87      46.81 
## promoter     exon   intron 
##     0.28     0.03     0.08 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       9    2355    7150   12285   14773  109527
genomation::plotTargetAnnotation(diffAnn_G2_infectedG1,precedence=TRUE, main="DMS G2-G1i", cex.legend = 1, border="white")

par(mfrow=c(1,1))
## Run the function to get DMS info
DMS_info_G1 <- myDMSinfo(DMSlist$DMS_15pc_G1_C_T, uniteCov6_G1_woSexAndUnknowChrOVERLAP)
DMS_info_G2_G1c_final <- myDMSinfo(DMSlist$DMS_15pc_CC_CT, uniteCov14_G2_woSexAndUnknowChrOVERLAP)
DMS_info_G2_G1i_final <- myDMSinfo(DMSlist$DMS_15pc_TC_TT,uniteCov14_G2_woSexAndUnknowChrOVERLAP)

NB Kostas’ results: “We found a total of 1,973 CpG sites out of 1,172,887 CpGs (0.17%) across the genome that showed at least 15% differential fractional methylation (differentially methylated site [DMS]; q < 0.01) between infected and uninfected fish”

Here: we obtain out a total of 1001880 CpG sites: 3648 (0.36%) showed at least 15% differential fractional methylation (differentially methylated site [DMS]; q < 0.01) between infected and uninfected fish in the parental group; 1197 (0.12%) in the offspring from control parents comparison; 690 (0.07%) in the offspring from infected parents comparison.

4.0.1.1 Intersections of DMS: Venn diagrams

## Chi2 test: are the number of DMS from G2-G1C and G2-G1I overlapping with DMSpar statistically different?

A=length(intersect(DMS_info_G1$DMS,DMS_info_G2_G1c_final$DMS))
B=length(DMS_info_G2_G1c_final$DMS)
C=length(intersect(DMS_info_G1$DMS,DMS_info_G2_G1i_final$DMS))
D=length(DMS_info_G2_G1i_final$DMS)

Observed=matrix(c(A, B-A, C, D-C),nrow=2)
Observed
##      [,1] [,2]
## [1,]  106   59
## [2,] 1091  631
chisq.test(Observed)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  Observed
## X-squared = 0.019909, df = 1, p-value = 0.8878
## not statistically different
## output Venn diagrams
allVenn <- ggVennDiagram(list("DMS G1" = DMS_info_G1$DMS, "DMS G2-c" = DMS_info_G2_G1c_final$DMS, "DMS G2-i" = DMS_info_G2_G1i_final$DMS), label_alpha = 0) +
  scale_fill_gradient(low="white",high = "red")
hypoVenn <- ggVennDiagram(list("DMS G1\nhypo" = DMS_info_G1$DMS[DMS_info_G1$direction %in% "hypo"],
                               "DMS G2-c\nhypo" = DMS_info_G2_G1c_final$DMS[DMS_info_G2_G1c_final$direction %in% "hypo"],
                               "DMS G2-i\nhypo" = DMS_info_G2_G1i_final$DMS[DMS_info_G2_G1i_final$direction %in% "hypo"]), label_alpha = 0) +
  scale_fill_gradient(low="white",high = "red")
hyperVenn <- ggVennDiagram(list("DMS G1\nhyper" = DMS_info_G1$DMS[DMS_info_G1$direction %in% "hyper"],
                                "DMS G2-c\nhyper" = DMS_info_G2_G1c_final$DMS[DMS_info_G2_G1c_final$direction %in% "hyper"],
                                "DMS G2-i\nhyper" = DMS_info_G2_G1i_final$DMS[DMS_info_G2_G1i_final$direction %in% "hyper"]), label_alpha = 0) +
  scale_fill_gradient(low="white",high = "red")

ggarrange(allVenn,
          ggarrange(hypoVenn, hyperVenn, ncol = 2, legend = "none"),
          nrow = 2, widths = c(.5,1))

4.0.1.1.1 Venn with annotated features
runHyperHypoAnnot <- function(){
  par(mfrow=c(2,3))
  par(mar = c(.1,0.1,5,0.1)) # Set the margin on all sides to 2
  ####### HYPO
  ## Parents comparison:
  A = annotateWithGeneParts(
    as(DMSlist$DMS_15pc_G1_C_T[DMS_info_G1$direction %in% "hypo",],"GRanges"),annotBed12)
  genomation::plotTargetAnnotation(A,precedence=TRUE, main="DMS G1\nhypo",
                                   cex.legend = .4, border="white")
  ## Offspring from control parents comparison:
  B = annotateWithGeneParts(
    as(DMSlist$DMS_15pc_CC_CT[DMS_info_G2_G1c_final$direction %in% "hypo",],"GRanges"),annotBed12)
  genomation::plotTargetAnnotation(B,precedence=TRUE, main="DMS G2-G1c\nhypo",
                                   cex.legend = .4, border="white")
  ## Offspring from infected parents comparison:
  C = annotateWithGeneParts(
    as(DMSlist$DMS_15pc_TC_TT[DMS_info_G2_G1i_final$direction %in% "hypo",],"GRanges"),annotBed12)
  genomation::plotTargetAnnotation(C,precedence=TRUE, main="DMS G2-G1i\nhypo",
                                   cex.legend = .4, border="white")
  ####### HYPER
  ## Parents comparison:
  D = annotateWithGeneParts(
    as(DMSlist$DMS_15pc_G1_C_T[DMS_info_G1$direction %in% "hyper",],"GRanges"),annotBed12)
  genomation::plotTargetAnnotation(D,precedence=TRUE, main="DMS G1\nhyper",
                                   cex.legend = .4, border="white")
  ## Offspring from control parents comparison:
  E = annotateWithGeneParts(
    as(DMSlist$DMS_15pc_CC_CT[DMS_info_G2_G1c_final$direction %in% "hyper",],"GRanges"),annotBed12)
  genomation::plotTargetAnnotation(E,precedence=TRUE, main="DMS G2-G1c\nhyper",
                                   cex.legend = .4, border="white")
  ## Offspring from infected parents comparison:
  f = annotateWithGeneParts(
    as(DMSlist$DMS_15pc_TC_TT[DMS_info_G2_G1i_final$direction %in% "hyper",],"GRanges"),annotBed12)
  genomation::plotTargetAnnotation(f,precedence=TRUE, main="DMS G2-G1i\nhyper",
                                   cex.legend = .4, border="white")
  par(mfrow=c(1,1))
  return(list(G1hypo=A, G2G1chypo=B, G2G1ihypo=C, G1hyper=D, G2G1chyper=E, G2G1ihyper=f))
}

myannot=runHyperHypoAnnot()

############################################################
## Venn diagram of overlapping features by their annotation:
table(rowSums(as.data.frame(myannot$G1hypo@members))) # NB: some positions are labelled with several features!
## 
##   0   1   2   3 
## 559 566  49   2
## as in MBE 2021: "giving precedence to the following order promoters, exons,
## introns, and intergenic regions when features overlapped"

myAnnotateDMS <- function(DMS, annot){
  ## sanity check
  if (nrow(DMS) != nrow(annot)){"STOP error in arguments"}
  DMS$pos <- paste(DMS$chr, DMS$start, DMS$end)
  ## NB as in MBE 2021: "giving precedence to the following order promoters, exons,
  ## introns, and intergenic regions when features overlapped"
  DMS$feature <- NA
  ## 1. promoters
  DMS$feature[which(annot$prom == 1)] = "promoter"
  ## 2. exons
  DMS$feature[which(annot$exon == 1 & annot$prom ==0)] = "exon"
  ## 3. intron
  DMS$feature[which(annot$intro == 1 & annot$exon == 0 & annot$prom ==0)] = "intron"
  ## 4. intergenic regions
  DMS$feature[which(annot$intro == 0 & annot$exon == 0 & annot$prom ==0)] = "intergenic"
  return(DMS)
}

DMSlist$DMS_15pc_G1_C_T = myAnnotateDMS(DMSlist$DMS_15pc_G1_C_T, as.data.frame(diffAnn_PAR@members))
DMSlist$DMS_15pc_G1_C_T_HYPO = myAnnotateDMS(DMSlist$DMS_15pc_G1_C_T[DMS_info_G1$direction %in% "hypo",],
                                             as.data.frame(myannot$G1hypo@members))
DMSlist$DMS_15pc_G1_C_T_HYPER = myAnnotateDMS(DMSlist$DMS_15pc_G1_C_T[DMS_info_G1$direction %in% "hyper",],
                                              as.data.frame(myannot$G1hyper@members))

DMSlist$DMS_15pc_CC_CT = myAnnotateDMS(DMSlist$DMS_15pc_CC_CT, as.data.frame(diffAnn_G2_controlG1@members))
DMSlist$DMS_15pc_CC_CT_HYPO = myAnnotateDMS(DMSlist$DMS_15pc_CC_CT[DMS_info_G2_G1c_final$direction %in% "hypo",],
                                            as.data.frame(myannot$G2G1chypo@members))
DMSlist$DMS_15pc_CC_CT_HYPER = myAnnotateDMS(DMSlist$DMS_15pc_CC_CT[DMS_info_G2_G1c_final$direction %in% "hyper",],
                                             as.data.frame(myannot$G2G1chyper@members))

DMSlist$DMS_15pc_TC_TT = myAnnotateDMS(DMSlist$DMS_15pc_TC_TT, as.data.frame(diffAnn_G2_infectedG1@members))
DMSlist$DMS_15pc_TC_TT_HYPO = myAnnotateDMS(DMSlist$DMS_15pc_TC_TT[DMS_info_G2_G1i_final$direction %in% "hypo",],
                                            as.data.frame(myannot$G2G1ihypo@members))
DMSlist$DMS_15pc_TC_TT_HYPER = myAnnotateDMS(DMSlist$DMS_15pc_TC_TT[DMS_info_G2_G1i_final$direction %in% "hyper",],
                                             as.data.frame(myannot$G2G1ihyper@members))

## Make Venn diagram for each feature
getFeatureDFHYPO <- function(myfeat){
  a = DMSlist$DMS_15pc_G1_C_T_HYPO$pos[DMSlist$DMS_15pc_G1_C_T_HYPO$feature %in% myfeat]
  b = DMSlist$DMS_15pc_CC_CT_HYPO$pos[DMSlist$DMS_15pc_CC_CT_HYPO$feature %in% myfeat]
  c = DMSlist$DMS_15pc_TC_TT_HYPO$pos[DMSlist$DMS_15pc_TC_TT_HYPO$feature %in% myfeat]
  return(list(a=a,b=b,c=c))
}

getFeatureDFHYPER <- function(myfeat){
  a = DMSlist$DMS_15pc_G1_C_T_HYPER$pos[DMSlist$DMS_15pc_G1_C_T_HYPER$feature %in% myfeat]
  b = DMSlist$DMS_15pc_CC_CT_HYPER$pos[DMSlist$DMS_15pc_CC_CT_HYPER$feature %in% myfeat]
  c = DMSlist$DMS_15pc_TC_TT_HYPER$pos[DMSlist$DMS_15pc_TC_TT_HYPER$feature %in% myfeat]
  return(list(a=a,b=b,c=c))
}

getVenn <- function(feat, direction){
  if (direction == "hypo"){
    ggVennDiagram(list(A = getFeatureDFHYPO(feat)[["a"]],
                       B = getFeatureDFHYPO(feat)[["b"]],
                       C = getFeatureDFHYPO(feat)[["c"]]), label_alpha = 0,
                  category.names = c(paste0("DMS G1\nhypo\n", feat), paste0("DMS G2-c\nhypo\n", feat), paste0("DMS G2-i\nhypo\n", feat))) +
      scale_fill_gradient(low="white",high = "red")
  } else if (direction == "hyper"){
    ggVennDiagram(list(A = getFeatureDFHYPER(feat)[["a"]],
                       B = getFeatureDFHYPER(feat)[["b"]],
                       C = getFeatureDFHYPER(feat)[["c"]]), label_alpha = 0,
                  category.names = c(paste0("DMS G1\nhyper\n", feat), paste0("DMS G2-c\nhyper\n", feat), paste0("DMS G2-i\nhyper\n", feat))) +
      scale_fill_gradient(low="white",high = "red")
  }
}
ggarrange(getVenn("promoter", "hypo"), getVenn("exon", "hypo"),
          getVenn("intron", "hypo"), getVenn("intergenic", "hypo"),
          nrow = 2, ncol = 2)

ggarrange(getVenn("promoter", "hyper"), getVenn("exon", "hyper"),
          getVenn("intron", "hyper"), getVenn("intergenic", "hyper"),
          nrow = 2, ncol = 2)

4.0.1.2 Manhattan plot of DMS

## Parents trt-ctrl
# load annotation
annot_PAR <- as.data.frame(diffAnn_PAR@members)
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_G1_C_T, annotFile = annot_PAR, GYgynogff = GYgynogff,
                   mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G1 DMS")

## G2-G1c trt-ctrl
# load annotation
annot_G2_G1c <- as.data.frame(diffAnn_G2_controlG1@members)
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_CC_CT, annotFile = annot_G2_G1c, GYgynogff = GYgynogff,
                   mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G2-G1c DMS")

## G2-G1i trt-ctrl
# load annotation
annot_G2_G1i <- as.data.frame(diffAnn_G2_infectedG1@members)
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_TC_TT, annotFile = annot_G2_G1i, GYgynogff = GYgynogff,
                   mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G2-G1i DMS")

## Outliers in Manhattan plot: 15% diff + 2SD
outliers_G1_final <- which(abs(DMSlist$DMS_15pc_G1_C_T$meth.diff) > 15 + 2*sd(abs(DMSlist$DMS_15pc_G1_C_T$meth.diff)))
outliers_annot_G1 <- as.data.frame(diffAnn_PAR@members)[outliers_G1_final,]
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_G1_C_T[outliers_G1_final, ],
                   annotFile = outliers_annot_G1, GYgynogff = GYgynogff,
                   mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G1 DMS")

outliers_G2_G1c_final <- which(abs(DMSlist$DMS_15pc_CC_CT$meth.diff) > 15 + 2*sd(abs(DMSlist$DMS_15pc_CC_CT$meth.diff)))
outliers_annot_G2_G1c <- as.data.frame(diffAnn_G2_controlG1@members)[outliers_G2_G1c_final,]
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_CC_CT[outliers_G2_G1c_final, ],
                   annotFile = outliers_annot_G2_G1c, GYgynogff = GYgynogff,
                   mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G2-G1c DMS")

outliers_G2_G1i_final <- which(abs(DMSlist$DMS_15pc_TC_TT$meth.diff) > 15 + 2*sd(abs(DMSlist$DMS_15pc_TC_TT$meth.diff)))
outliers_annot_G2_G1i <- as.data.frame(diffAnn_G2_infectedG1@members)[outliers_G2_G1i_final,]
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_TC_TT[outliers_G2_G1i_final, ],
                   annotFile = outliers_annot_G2_G1i, GYgynogff = GYgynogff,
                   mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G2-G1i DMS")

4.1 I. Focus on CpG positions at parental (Ctrl-Inf) DMS

Question: how are the beta values in the different groups at the parental DMS?

##############
## Prepare dataset
PM_G1 <- getPMdataset(uniteCov = uniteCov6_G1_woSexAndUnknowChrOVERLAP, MD = fullMetadata_PAR, gener="parents")
PM_G2 <- getPMdataset(uniteCov = uniteCov14_G2_woSexAndUnknowChrOVERLAP, MD = fullMetadata_OFFS, gener="offspring")

4.1.1 What is the relative contribution of methylation to brother pair & paternal treatment?

Test of VCA: variance component analysis https://cran.r-project.org/web/packages/VCA/vignettes/VCA_package_vignette.html

4.1.1.1 Hypo methylation

PM_G2_mean_hypo <- PM_G2[PM_G2$hypohyper %in% "hypo", ] %>%
  group_by(brotherPairID, G1_trt, G2_trt, ID) %>%
  dplyr::summarize(MeanBetaValue = mean(BetaValue, na.rm=TRUE)) %>% data.frame()

varPlot(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = PM_G2_mean_hypo,
        MeanLine=list(var=c("G1_trt", "G2_trt"),
                      col=c("white", "blue"), lwd=c(2,2)),
        BG=list(var="G2_trt", col=paste0("gray", c(80, 90))),
        YLabel=list(cex = .8, text="Mean beta value at parDMS \n hypomethylated upon infection"))

myfitVCA_hypo <- fitVCA(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = PM_G2_mean_hypo)

### Real values
trtEffect <- sum(myfitVCA_hypo$aov.tab[2:4, 5])
genEffect <- sum(myfitVCA_hypo$aov.tab[5:8, 5])
error <- sum(myfitVCA_hypo$aov.tab[9, 5])
realValHypoVCA <- data.frame(trtEffect=trtEffect, genEffect=genEffect,error=error)

### Randomisation
myrandomVCA <- function(df=PM_G2_mean_hypo){
  randomDF = df
  randomDF$G1_trt = sample(PM_G2_mean_hypo$G1_trt, replace = F)
  randomDF$G2_trt = sample(PM_G2_mean_hypo$G2_trt, replace = F)
  randomDF$brotherPairID = sample(PM_G2_mean_hypo$brotherPairID, replace = F)
  myfitVCA <- fitVCA(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = randomDF)
  trtEffect <- sum(myfitVCA$aov.tab[2:4, 5])
  genEffect <- sum(myfitVCA$aov.tab[5:8, 5])
  error <- sum(myfitVCA$aov.tab[9, 5])
  return(data.frame(trtEffect=trtEffect, genEffect=genEffect,error=error))
}

# randomHypoVCA = do.call(rbind, lapply(1:1000, function(x) {
#   df=myrandomVCA(PM_G2_mean_hypo)
#   df$rep=x
#   return(df)}))

# randomHypoVCA = melt(randomHypoVCA, id.vars = "rep")
# saveRDS(randomHypoVCA, file = "Rdata/randomHypoVCA.RDS")
randomHypoVCA <- readRDS(file = "Rdata/randomHypoVCA.RDS")
df2=reshape2::melt(realValHypoVCA)

sumDF <- randomHypoVCA %>%
  group_by(variable) %>%
  dplyr::summarize(value = mean(value)) %>% data.frame()

ggplot(randomHypoVCA, aes(x=variable, y=value))+
  geom_boxplot()+
  geom_jitter(width=.1, alpha=.2)+
  geom_point(data = df2, col = "red", size = 6)+
  geom_text(data=sumDF, aes(label=round(value)), col="white")+
  geom_text(data = df2, aes(label=round(value)), col="white")+
  theme_cleveland()+
  ggtitle("VCA with bootstrap N=1000 at hypo-parDMS", subtitle = "red: observed values")

# estimate 95% confidence intervals, request CI for
# all variance components via 'VarVC=TRUE'
VCAinference(myfitVCA_hypo, VarVC=TRUE)
## 
## 
## 
## Inference from (V)ariance (C)omponent (A)nalysis
## ------------------------------------------------
## 
## > VCA Result:
## -------------
## 
##   Name                        DF     SS       MS       VC      %Total  SD    
## 1 total                       6.6733                   15.8572 100     3.9821
## 2 G1_trt                      1      319.6035 319.6035 4.7515  29.9646 2.1798
## 3 G2_trt                      1      3.8987   3.8987   0.0712  0.4491  0.2669
## 4 G1_trt:G2_trt               1      1.5539   1.5539   0*      0*      0*    
## 5 brotherPairID               7      129.185  18.455   0*      0*      0*    
## 6 G1_trt:brotherPairID        7      395.9735 56.5676  7.8576  49.5525 2.8031
## 7 G2_trt:brotherPairID        7      9.9214   1.4173   0*      0*      0*    
## 8 G1_trt:G2_trt:brotherPairID 7      20.2165  2.8881   0*      0*      0*    
## 9 error                       79     250.9663 3.1768   3.1768  20.0337 1.7824
##   CV[%]  Var(VC)
## 1 6.6047        
## 2 3.6154 66.6443
## 3 0.4426 0.0124 
## 4 0*     0.0096 
## 5 0*     5.4751 
## 6 4.6493 19.7869
## 7 0*     0.068  
## 8 0*     0.2383 
## 9 2.9562 0.2555 
## 
## Mean: 60.2922 (N = 111) 
## 
## Experimental Design: unbalanced  |  Method: ANOVA | * VC set to 0 | adapted MS used for total DF
## 
## 
## > VC:
## -----
##                             Estimate CI LCL CI UCL  One-Sided LCL One-Sided UCL
## total                       15.8572  6.824  68.824  7.787         53.1883      
## G1_trt                      4.7515   0*     20.7519 0*            18.1795      
## G2_trt                      0.0712   0*     0.2897  0*            0.2545       
## G1_trt:G2_trt               0                                                  
## brotherPairID               0                                                  
## G1_trt:brotherPairID        7.8576   0*     16.576  0.5409        15.1744      
## G2_trt:brotherPairID        0                                                  
## G1_trt:G2_trt:brotherPairID 0                                                  
## error                       3.1768   2.3794 4.457   2.491         4.2163       
## 
## > SD:
## -----
##                             Estimate CI LCL CI UCL One-Sided LCL One-Sided UCL
## total                       3.9821   2.6123 8.296  2.7905        7.293        
## G1_trt                      2.1798   0*     4.5554 0*            4.2637       
## G2_trt                      0.2669   0*     0.5382 0*            0.5045       
## G1_trt:G2_trt               0                                                 
## brotherPairID               0                                                 
## G1_trt:brotherPairID        2.8031   0*     4.0714 0.7355        3.8954       
## G2_trt:brotherPairID        0                                                 
## G1_trt:G2_trt:brotherPairID 0                                                 
## error                       1.7824   1.5425 2.1112 1.5783        2.0534       
## 
## > CV[%]:
## --------
##                             Estimate CI LCL CI UCL  One-Sided LCL One-Sided UCL
## total                       6.6047   4.3327 13.7597 4.6283        12.0961      
## G1_trt                      3.6154   0*     7.5556  0*            7.0718       
## G2_trt                      0.4426   0*     0.8926  0*            0.8368       
## G1_trt:G2_trt               0                                                  
## brotherPairID               0                                                  
## G1_trt:brotherPairID        4.6493   0*     6.7527  1.2199        6.4609       
## G2_trt:brotherPairID        0                                                  
## G1_trt:G2_trt:brotherPairID 0                                                  
## error                       2.9562   2.5584 3.5015  2.6177        3.4057       
## 
## 
## 95% Confidence Level  |  CIs for negative VCs excluded  | * CI-limits constrained to be >= 0
## SAS PROC MIXED method used for computing CIs

4.1.1.2 Hyper methylation

PM_G2_mean_hyper <- PM_G2[PM_G2$hypohyper %in% "hyper", ] %>%
  group_by(brotherPairID, G1_trt, G2_trt, ID) %>%
  dplyr::summarize(MeanBetaValue = mean(BetaValue, na.rm=TRUE)) %>% data.frame()

varPlot(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = PM_G2_mean_hyper,
        MeanLine=list(var=c("G1_trt", "G2_trt"),
                      col=c("white", "blue"), lwd=c(2,2)),
        BG=list(var="G2_trt", col=paste0("gray", c(80, 90))),
        YLabel=list(cex = .8, text="Mean beta value at parDMS \n hypermethylated upon infection"))

myfitVCA_hyper <- fitVCA(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = PM_G2_mean_hyper)

### Real values
trtEffect <- sum(myfitVCA_hyper$aov.tab[2:4, 5])
genEffect <- sum(myfitVCA_hyper$aov.tab[5:8, 5])
error <- sum(myfitVCA_hyper$aov.tab[9, 5])
realValHyperVCA <- data.frame(trtEffect=trtEffect, genEffect=genEffect,error=error)

### Randomisation
# randomHyperVCA = do.call(rbind, lapply(1:1000, function(x) {
#   df=myrandomVCA(PM_G2_mean_hyper)
#   df$rep=x
#   return(df)}))
#
# randomHyperVCA = melt(randomHyperVCA, id.vars = "rep")
# saveRDS(randomHyperVCA, file = "Rdata/randomHyperVCA.RDS")
randomHyperVCA <- readRDS(file = "Rdata/randomHyperVCA.RDS")
df2=reshape2::melt(realValHyperVCA)

sumDF <- randomHyperVCA %>%
  group_by(variable) %>%
  dplyr::summarize(value = mean(value)) %>% data.frame()

ggplot(randomHyperVCA, aes(x=variable, y=value))+
  geom_boxplot()+
  geom_jitter(width=.1, alpha=.2)+
  geom_point(data = df2, col = "red", size = 6)+
  geom_text(data=sumDF, aes(label=round(value)), col="white")+
  geom_text(data = df2, aes(label=round(value)), col="white")+
  theme_cleveland()+
  ggtitle("VCA with bootstrap N=1000 at hyper-parDMS", subtitle = "red: observed values")

# estimate 95% confidence intervals, request CI for
# all variance components via 'VarVC=TRUE'
VCAinference(myfitVCA_hypo, VarVC=TRUE)
## 
## 
## 
## Inference from (V)ariance (C)omponent (A)nalysis
## ------------------------------------------------
## 
## > VCA Result:
## -------------
## 
##   Name                        DF     SS       MS       VC      %Total  SD    
## 1 total                       6.6733                   15.8572 100     3.9821
## 2 G1_trt                      1      319.6035 319.6035 4.7515  29.9646 2.1798
## 3 G2_trt                      1      3.8987   3.8987   0.0712  0.4491  0.2669
## 4 G1_trt:G2_trt               1      1.5539   1.5539   0*      0*      0*    
## 5 brotherPairID               7      129.185  18.455   0*      0*      0*    
## 6 G1_trt:brotherPairID        7      395.9735 56.5676  7.8576  49.5525 2.8031
## 7 G2_trt:brotherPairID        7      9.9214   1.4173   0*      0*      0*    
## 8 G1_trt:G2_trt:brotherPairID 7      20.2165  2.8881   0*      0*      0*    
## 9 error                       79     250.9663 3.1768   3.1768  20.0337 1.7824
##   CV[%]  Var(VC)
## 1 6.6047        
## 2 3.6154 66.6443
## 3 0.4426 0.0124 
## 4 0*     0.0096 
## 5 0*     5.4751 
## 6 4.6493 19.7869
## 7 0*     0.068  
## 8 0*     0.2383 
## 9 2.9562 0.2555 
## 
## Mean: 60.2922 (N = 111) 
## 
## Experimental Design: unbalanced  |  Method: ANOVA | * VC set to 0 | adapted MS used for total DF
## 
## 
## > VC:
## -----
##                             Estimate CI LCL CI UCL  One-Sided LCL One-Sided UCL
## total                       15.8572  6.824  68.824  7.787         53.1883      
## G1_trt                      4.7515   0*     20.7519 0*            18.1795      
## G2_trt                      0.0712   0*     0.2897  0*            0.2545       
## G1_trt:G2_trt               0                                                  
## brotherPairID               0                                                  
## G1_trt:brotherPairID        7.8576   0*     16.576  0.5409        15.1744      
## G2_trt:brotherPairID        0                                                  
## G1_trt:G2_trt:brotherPairID 0                                                  
## error                       3.1768   2.3794 4.457   2.491         4.2163       
## 
## > SD:
## -----
##                             Estimate CI LCL CI UCL One-Sided LCL One-Sided UCL
## total                       3.9821   2.6123 8.296  2.7905        7.293        
## G1_trt                      2.1798   0*     4.5554 0*            4.2637       
## G2_trt                      0.2669   0*     0.5382 0*            0.5045       
## G1_trt:G2_trt               0                                                 
## brotherPairID               0                                                 
## G1_trt:brotherPairID        2.8031   0*     4.0714 0.7355        3.8954       
## G2_trt:brotherPairID        0                                                 
## G1_trt:G2_trt:brotherPairID 0                                                 
## error                       1.7824   1.5425 2.1112 1.5783        2.0534       
## 
## > CV[%]:
## --------
##                             Estimate CI LCL CI UCL  One-Sided LCL One-Sided UCL
## total                       6.6047   4.3327 13.7597 4.6283        12.0961      
## G1_trt                      3.6154   0*     7.5556  0*            7.0718       
## G2_trt                      0.4426   0*     0.8926  0*            0.8368       
## G1_trt:G2_trt               0                                                  
## brotherPairID               0                                                  
## G1_trt:brotherPairID        4.6493   0*     6.7527  1.2199        6.4609       
## G2_trt:brotherPairID        0                                                  
## G1_trt:G2_trt:brotherPairID 0                                                  
## error                       2.9562   2.5584 3.5015  2.6177        3.4057       
## 
## 
## 95% Confidence Level  |  CIs for negative VCs excluded  | * CI-limits constrained to be >= 0
## SAS PROC MIXED method used for computing CIs

4.2 DMS values in parents

parmod <- lmer(data = PM_G1, BetaValue ~ meth.diff.parentals : Treatment + (1|CpGSite) + (1|brotherPairID))

## check normality of residuals assumption
qqnorm(resid(parmod))
qqline(resid(parmod))

pred <- ggpredict(parmod, terms = c("meth.diff.parentals", "Treatment"))
plot(pred, add.data = T)+
  scale_color_manual(values = c("black", "red"))+
  scale_y_continuous(name = "Beta values")+
  scale_x_continuous(name = "Methylation difference between infected and control parents in percentage")+
  ggtitle("Predicted methylation ratio (Beta) values in parents\n as a function of differential methylation between exposed and control groups")+
  theme_bw()

4.2.1 Linear model: does the beta value of offspring at DMS depends on treatment Parent x Offspring?

modFull <- lmer(BetaValue ~ (G1_trt * G2_trt):hypohyper + (1|CpGSite) + (1|Sex) + (1|brotherPairID),data = PM_G2, REML = F) # REML =F for model comparison
mod_noG1trt <- lmer(BetaValue ~ G2_trt:hypohyper + (1|CpGSite)+ (1|Sex) + (1|brotherPairID), data = PM_G2, REML = F)
mod_noG2trt <-lmer(BetaValue ~ G1_trt:hypohyper + (1|CpGSite) + (1|Sex) + (1|brotherPairID), data = PM_G2, REML = F)
mod_noInteractions <- lmer(BetaValue ~ (G1_trt + G2_trt):hypohyper + (1|CpGSite) + (1|Sex) + (1|brotherPairID), data = PM_G2, REML = F)
mod_noHypoHyper <- lmer(BetaValue ~ (G1_trt * G2_trt) + (1|CpGSite) + (1|Sex) + (1|brotherPairID), data = PM_G2, REML = F)

## check normality of residuals assumption
qqnorm(resid(modFull))
qqline(resid(modFull))

## Likelihood ratio tests for all variables:
lrtest(modFull, mod_noG1trt) # G1 trt is VERY VERY significant (LRT: χ² (4) = 1163.6, p < 0.001)
## Likelihood ratio test
## 
## Model 1: BetaValue ~ (G1_trt * G2_trt):hypohyper + (1 | CpGSite) + (1 | 
##     Sex) + (1 | brotherPairID)
## Model 2: BetaValue ~ G2_trt:hypohyper + (1 | CpGSite) + (1 | Sex) + (1 | 
##     brotherPairID)
##   #Df   LogLik Df  Chisq Pr(>Chisq)    
## 1  12 -1515719                         
## 2   8 -1516301 -4 1163.6  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lrtest(modFull, mod_noG2trt) # G2 trt is VERY VERY significant (LRT: χ² (4) = 30.02, p < 0.001) NB that changed when brotherpair is used instead of family!
## Likelihood ratio test
## 
## Model 1: BetaValue ~ (G1_trt * G2_trt):hypohyper + (1 | CpGSite) + (1 | 
##     Sex) + (1 | brotherPairID)
## Model 2: BetaValue ~ G1_trt:hypohyper + (1 | CpGSite) + (1 | Sex) + (1 | 
##     brotherPairID)
##   #Df   LogLik Df  Chisq Pr(>Chisq)    
## 1  12 -1515719                         
## 2   8 -1515734 -4 30.022  4.844e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lrtest(modFull, mod_noInteractions) # interactions are significant (LRT: χ² (2) = 9.21, p < 0.01)
## Likelihood ratio test
## 
## Model 1: BetaValue ~ (G1_trt * G2_trt):hypohyper + (1 | CpGSite) + (1 | 
##     Sex) + (1 | brotherPairID)
## Model 2: BetaValue ~ (G1_trt + G2_trt):hypohyper + (1 | CpGSite) + (1 | 
##     Sex) + (1 | brotherPairID)
##   #Df   LogLik Df Chisq Pr(>Chisq)   
## 1  12 -1515719                       
## 2  10 -1515724 -2 9.211   0.009997 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lrtest(modFull, mod_noHypoHyper) # hypo/hyper VERY VERY significant (LRT: χ² (4) = 1140, p < 0.001)
## Likelihood ratio test
## 
## Model 1: BetaValue ~ (G1_trt * G2_trt):hypohyper + (1 | CpGSite) + (1 | 
##     Sex) + (1 | brotherPairID)
## Model 2: BetaValue ~ (G1_trt * G2_trt) + (1 | CpGSite) + (1 | Sex) + (1 | 
##     brotherPairID)
##   #Df   LogLik Df  Chisq Pr(>Chisq)    
## 1  12 -1515719                         
## 2   8 -1516289 -4 1140.4  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Post-hoc tests between treatments
modFull <- lmer(BetaValue ~ (G1_trt * G2_trt):hypohyper + (1|CpGSite) + (1|Sex) + (1|brotherPairID),data = PM_G2)
modFull_emmeans <- emmeans(modFull, list(pairwise ~ (G1_trt:G2_trt):hypohyper), adjust = "tukey")
modFull_emmeans
## $`emmeans of G1_trt, G2_trt, hypohyper`
##  G1_trt  G2_trt  hypohyper emmean    SE  df asymp.LCL asymp.UCL
##  Control Control hypo        62.1 0.960 Inf      60.2      64.0
##  Exposed Control hypo        58.8 0.959 Inf      57.0      60.7
##  Control Exposed hypo        62.3 0.960 Inf      60.4      64.2
##  Exposed Exposed hypo        58.9 0.960 Inf      57.1      60.8
##  Control Control hyper       56.6 0.872 Inf      54.9      58.3
##  Exposed Control hyper       58.7 0.872 Inf      57.0      60.4
##  Control Exposed hyper       55.8 0.872 Inf      54.1      57.6
##  Exposed Exposed hyper       58.6 0.872 Inf      56.9      60.3
## 
## Degrees-of-freedom method: asymptotic 
## Confidence level used: 0.95 
## 
## $`pairwise differences of G1_trt, G2_trt, hypohyper`
##  contrast                                      estimate    SE  df z.ratio
##  Control Control hypo - Exposed Control hypo     3.2670 0.199 Inf  16.439
##  Control Control hypo - Control Exposed hypo    -0.2024 0.205 Inf  -0.989
##  Control Control hypo - Exposed Exposed hypo     3.1756 0.202 Inf  15.735
##  Control Control hypo - Control Control hyper    5.5292 0.672 Inf   8.226
##  Control Control hypo - Exposed Control hyper    3.3925 0.672 Inf   5.049
##  Control Control hypo - Control Exposed hyper    6.2691 0.673 Inf   9.317
##  Control Control hypo - Exposed Exposed hyper    3.5420 0.672 Inf   5.268
##  Exposed Control hypo - Control Exposed hypo    -3.4694 0.202 Inf -17.160
##  Exposed Control hypo - Exposed Exposed hypo    -0.0913 0.199 Inf  -0.460
##  Exposed Control hypo - Control Control hyper    2.2623 0.671 Inf   3.369
##  Exposed Control hypo - Exposed Control hyper    0.1256 0.671 Inf   0.187
##  Exposed Control hypo - Control Exposed hyper    3.0021 0.672 Inf   4.467
##  Exposed Control hypo - Exposed Exposed hyper    0.2751 0.671 Inf   0.410
##  Control Exposed hypo - Exposed Exposed hypo     3.3780 0.205 Inf  16.479
##  Control Exposed hypo - Control Control hyper    5.7317 0.673 Inf   8.514
##  Control Exposed hypo - Exposed Control hyper    3.5950 0.673 Inf   5.342
##  Control Exposed hypo - Control Exposed hyper    6.4715 0.674 Inf   9.608
##  Control Exposed hypo - Exposed Exposed hyper    3.7444 0.673 Inf   5.561
##  Exposed Exposed hypo - Control Control hyper    2.3536 0.672 Inf   3.501
##  Exposed Exposed hypo - Exposed Control hyper    0.2169 0.672 Inf   0.323
##  Exposed Exposed hypo - Control Exposed hyper    3.0935 0.673 Inf   4.597
##  Exposed Exposed hypo - Exposed Exposed hyper    0.3664 0.672 Inf   0.545
##  Control Control hyper - Exposed Control hyper  -2.1367 0.137 Inf -15.618
##  Control Control hyper - Control Exposed hyper   0.7398 0.141 Inf   5.251
##  Control Control hyper - Exposed Exposed hyper  -1.9872 0.139 Inf -14.332
##  Exposed Control hyper - Control Exposed hyper   2.8765 0.140 Inf  20.557
##  Exposed Control hyper - Exposed Exposed hyper   0.1495 0.137 Inf   1.091
##  Control Exposed hyper - Exposed Exposed hyper  -2.7271 0.141 Inf -19.290
##  p.value
##   <.0001
##   0.9761
##   <.0001
##   <.0001
##   <.0001
##   <.0001
##   <.0001
##   <.0001
##   0.9998
##   0.0172
##   1.0000
##   0.0002
##   0.9999
##   <.0001
##   <.0001
##   <.0001
##   <.0001
##   <.0001
##   0.0110
##   1.0000
##   0.0001
##   0.9994
##   <.0001
##   <.0001
##   <.0001
##   <.0001
##   0.9589
##   <.0001
## 
## Degrees-of-freedom method: asymptotic 
## P value adjustment: tukey method for comparing a family of 8 estimates
P1 <- plot(modFull_emmeans, by = "hypohyper", comparisons = TRUE) +
  # coord_flip()+
  theme_bw() +
  ggtitle("Estimated marginal means of methylation ratio (beta)\n of offspring at parental DMS")+
  theme(legend.position = "none", axis.title.x = element_blank()) +
  scale_x_continuous("Beta value (methylation ratio)", limits = c(47,69.5))

## NB: Comparison arrows: https://cran.r-project.org/web/packages/emmeans/vignettes/xplanations.html
## two estimated marginal means (EMMs) differ significantly if, and only if, their respective comparison arrows do not overlap
## These comparison arrows are decidedly not the same as confidence intervals.
## Confidence intervals for EMMs are based on the statistical properties of the individual EMMs, whereas comparison arrows
## are based on the statistical properties of differences of EMMs.

## Add the PARENTAL DMS value
## Same test on ALL, G1 and G2 fish
modFullG1 <- lmer(BetaValue ~ G1_trt:hypohyper + (1|CpGSite) + (1|brotherPairID), data = PM_G1)

modFullG1_emmeans <- emmeans(modFullG1, list(pairwise ~ G1_trt:hypohyper), adjust = "tukey")
modFullG1_emmeans
## $`emmeans of G1_trt, hypohyper`
##  G1_trt  hypohyper emmean    SE  df asymp.LCL asymp.UCL
##  Control hypo        66.9 0.657 Inf      65.7      68.2
##  Exposed hypo        49.1 0.661 Inf      47.8      50.4
##  Control hyper       48.6 0.534 Inf      47.6      49.6
##  Exposed hyper       65.8 0.536 Inf      64.7      66.8
## 
## Degrees-of-freedom method: asymptotic 
## Confidence level used: 0.95 
## 
## $`pairwise differences of G1_trt, hypohyper`
##  1                             estimate    SE  df z.ratio p.value
##  Control hypo - Exposed hypo     17.889 0.303 Inf  59.074  <.0001
##  Control hypo - Control hyper    18.343 0.643 Inf  28.539  <.0001
##  Control hypo - Exposed hyper     1.179 0.645 Inf   1.829  0.2597
##  Exposed hypo - Control hyper     0.454 0.647 Inf   0.701  0.8966
##  Exposed hypo - Exposed hyper   -16.711 0.649 Inf -25.761  <.0001
##  Control hyper - Exposed hyper  -17.164 0.209 Inf -82.107  <.0001
## 
## Degrees-of-freedom method: asymptotic 
## P value adjustment: tukey method for comparing a family of 4 estimates
P2 <- plot(modFullG1_emmeans, by = "hypohyper", comparisons = TRUE) +
  theme_bw() +
  ggtitle("Estimated marginal means of methylation ratio (beta)\n of parents at DMS")+
  theme(legend.position = "none", axis.title.x = element_blank()) +
  scale_x_continuous("Beta value (methylation ratio)", limits = c(47,69.5))

ggarrange(P2, P1, labels = c("A", "B"), ncol = 1, nrow = 2)

4.2.2 Are the nbr of residuals methylation AT PARENTAL DMS different in the 4 G2 trt? (for hypo vs hypermeth)?

length(unique(PM_G1$CpGSite))# 3648 positions
## [1] 3648
PM_G1 %>% dplyr::count(ID)## NB: not all covered in all samples
##      ID    n
## 1   S16 3300
## 2   S17 2842
## 3   S35 2747
## 4   S36 2568
## 5   S52 3037
## 6   S53 3468
## 7   S68 3348
## 8   S69 3500
## 9   S70 2237
## 10  S71 3302
## 11  S72 3479
## 12  S73 2973
## 13  S76 3239
## 14  S77 3232
## 15  S78 3434
## 16  S79 2679
## 17  S94 2564
## 18  S95 1766
## 19 S107 3414
## 20 S108 3230
## 21 S124 3387
## 22 S125 2526
## 23 S143 3097
## 24 S144 2773
length(unique(PM_G2$CpGSite[PM_G2$hypohyper %in% "hypo"]))# 1176 positions hypomethylated upon parental inf
## [1] 1176
length(unique(PM_G2$CpGSite[PM_G2$hypohyper %in% "hyper"]))# 2472 positions hypermethylated upon parental inf
## [1] 2472
myfun <- function(X){
  ## Calculate nbr of CpG hypo/hypermethylated per individual, and nbr of covered CpG:
  X <- X %>% group_by(ID, Treatment, brotherPairID, clutch.ID, Sex) %>%
    dplyr::summarise(ncov = n(),
                     hypoMeth = sum(BetaValue < 0.3),
                     hyperMeth = sum(BetaValue > 0.7)) %>% data.frame()
  # Calculate residuals of nbr of methCpG by nbr of covered CpG
  X$res_Nbr_methCpG_Nbr_coveredCpG_HYPO = residuals(lm(X$hypoMeth ~ X$ncov))
  X$res_Nbr_methCpG_Nbr_coveredCpG_HYPER = residuals(lm(X$hyperMeth ~ X$ncov))
  
  ## Statistical model: is it different in each offspring trt group?
  mod1 <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ Treatment + (1|brotherPairID/clutch.ID) + (1|Sex),
               data = X, REML = F)
  mod0 <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ 1 + (1|brotherPairID/clutch.ID) + (1|Sex),
               data = X, REML = F)
  print(lrtest(mod1, mod0))
  
  ## Post-hoc test:
  modhypo <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ Treatment + (1|brotherPairID/clutch.ID) + (1|Sex),
                  data = X)
  ## pairwise posthoc test
  print(emmeans(modhypo, list(pairwise ~ Treatment), adjust = "tukey"))
  
  mod3 <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ Treatment + (1|brotherPairID/clutch.ID) + (1|Sex),
               data = X, REML = F)
  mod4 <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ 1 + (1|brotherPairID/clutch.ID) + (1|Sex),
               data = X, REML = F)
  print(lrtest(mod3, mod4))
  
  ## Post-hoc test:
  modhyper <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ Treatment + (1|brotherPairID/clutch.ID) + (1|Sex),
                   data = X)
  ## pairwise posthoc test
  print(emmeans(modhyper, list(pairwise ~ Treatment), adjust = "tukey"))
  
  ## Plot
  phypo <- plot(ggpredict(modhypo, terms = c("Treatment")), add.data = TRUE)+
    scale_y_continuous("Residuals of number of hypomethylated methylated \ncytosines on number of cytosines covered") +
    ggtitle("Predicted residuals nbr of hypomethylated CpG")+
    theme_bw()
  
  phyper <- plot(ggpredict(modhyper, terms = c("Treatment")), add.data = TRUE)+
    scale_y_continuous("Residuals of number of hypermethylated methylated \n cytosines on number of cytosines covered") +
    ggtitle("Predicted residuals nbr of hypermethylated CpG")+
    theme_bw()
  return(list(phypo, phyper))
}

listplots <- myfun(X = PM_G2[PM_G2$hypohyper %in% "hypo",])
## Likelihood ratio test
## 
## Model 1: res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ Treatment + (1 | brotherPairID/clutch.ID) + 
##     (1 | Sex)
## Model 2: res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ 1 + (1 | brotherPairID/clutch.ID) + 
##     (1 | Sex)
##   #Df  LogLik Df Chisq Pr(>Chisq)
## 1   8 -420.75                    
## 2   5 -423.55 -3 5.605     0.1325
## $`emmeans of Treatment`
##  Treatment  emmean   SE   df lower.CL upper.CL
##  NE_control  -4.01 4.11 16.2   -12.72     4.70
##  NE_exposed  -5.64 4.14 16.3   -14.40     3.11
##  E_control    7.45 4.12 16.1    -1.29    16.18
##  E_exposed    4.61 4.12 16.1    -4.11    13.33
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $`pairwise differences of Treatment`
##  1                       estimate   SE    df t.ratio p.value
##  NE_control - NE_exposed     1.63 2.55 93.08   0.640  0.9188
##  NE_control - E_control    -11.46 5.83  8.47  -1.966  0.2722
##  NE_control - E_exposed     -8.62 5.82  8.47  -1.482  0.4875
##  NE_exposed - E_control    -13.09 5.86  8.53  -2.234  0.1893
##  NE_exposed - E_exposed    -10.25 5.85  8.53  -1.754  0.3559
##  E_control - E_exposed       2.84 2.50 92.53   1.135  0.6692
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 4 estimates 
## 
## Likelihood ratio test
## 
## Model 1: res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ Treatment + (1 | brotherPairID/clutch.ID) + 
##     (1 | Sex)
## Model 2: res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ 1 + (1 | brotherPairID/clutch.ID) + 
##     (1 | Sex)
##   #Df  LogLik Df  Chisq Pr(>Chisq)
## 1   8 -419.75                     
## 2   5 -422.58 -3 5.6616     0.1293
## $`emmeans of Treatment`
##  Treatment  emmean   SE   df lower.CL upper.CL
##  NE_control   3.97 4.18 16.1    -4.88    12.82
##  NE_exposed   5.61 4.20 16.2    -3.29    14.51
##  E_control   -7.52 4.19 16.0   -16.40     1.35
##  E_exposed   -4.53 4.18 16.1   -13.39     4.33
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $`pairwise differences of Treatment`
##  1                       estimate   SE    df t.ratio p.value
##  NE_control - NE_exposed    -1.64 2.51 93.04  -0.651  0.9148
##  NE_control - E_control     11.50 5.92  8.37   1.942  0.2815
##  NE_control - E_exposed      8.50 5.91  8.37   1.438  0.5112
##  NE_exposed - E_control     13.13 5.95  8.43   2.208  0.1971
##  NE_exposed - E_exposed     10.14 5.94  8.43   1.707  0.3771
##  E_control - E_exposed      -3.00 2.47 92.51  -1.216  0.6185
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 4 estimates
## NOT significant
annotate_figure(ggarrange(listplots[[1]], listplots[[2]],ncol = 2, nrow = 1),
                top = text_grob("Parental DMS hypomethylated upon infection, in offspring"))

listplots <- myfun(X = PM_G2[PM_G2$hypohyper %in% "hyper",])
## Likelihood ratio test
## 
## Model 1: res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ Treatment + (1 | brotherPairID/clutch.ID) + 
##     (1 | Sex)
## Model 2: res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ 1 + (1 | brotherPairID/clutch.ID) + 
##     (1 | Sex)
##   #Df  LogLik Df  Chisq Pr(>Chisq)   
## 1   8 -476.63                        
## 2   5 -482.69 -3 12.123   0.006974 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $`emmeans of Treatment`
##  Treatment  emmean   SE   df lower.CL upper.CL
##  NE_control  13.94 5.13 17.0     3.12    24.75
##  NE_exposed   8.48 5.19 17.2    -2.46    19.42
##  E_control   -9.77 5.15 16.8   -20.66     1.11
##  E_exposed  -12.95 5.14 16.9   -23.79    -2.11
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $`pairwise differences of Treatment`
##  1                       estimate   SE   df t.ratio p.value
##  NE_control - NE_exposed     5.46 4.45 93.8   1.225  0.6124
##  NE_control - E_control     23.71 7.28 10.3   3.257  0.0353
##  NE_control - E_exposed     26.88 7.26 10.3   3.701  0.0172
##  NE_exposed - E_control     18.25 7.36 10.5   2.481  0.1209
##  NE_exposed - E_exposed     21.43 7.33 10.5   2.923  0.0598
##  E_control - E_exposed       3.17 4.37 93.0   0.726  0.8864
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 4 estimates 
## 
## Likelihood ratio test
## 
## Model 1: res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ Treatment + (1 | brotherPairID/clutch.ID) + 
##     (1 | Sex)
## Model 2: res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ 1 + (1 | brotherPairID/clutch.ID) + 
##     (1 | Sex)
##   #Df  LogLik Df  Chisq Pr(>Chisq)   
## 1   8 -477.24                        
## 2   5 -483.28 -3 12.081    0.00711 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $`emmeans of Treatment`
##  Treatment  emmean   SE   df lower.CL upper.CL
##  NE_control -14.11 5.18 17.0   -25.04    -3.18
##  NE_exposed  -8.53 5.24 17.2   -19.58     2.52
##  E_control    9.95 5.21 16.8    -1.05    20.95
##  E_exposed   12.96 5.19 16.9     2.00    23.92
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $`pairwise differences of Treatment`
##  1                       estimate   SE   df t.ratio p.value
##  NE_control - NE_exposed    -5.58 4.47 93.8  -1.247  0.5989
##  NE_control - E_control    -24.06 7.36 10.3  -3.269  0.0348
##  NE_control - E_exposed    -27.07 7.34 10.3  -3.687  0.0177
##  NE_exposed - E_control    -18.48 7.43 10.4  -2.486  0.1204
##  NE_exposed - E_exposed    -21.49 7.41 10.4  -2.901  0.0622
##  E_control - E_exposed      -3.01 4.39 93.0  -0.686  0.9021
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 4 estimates
## Treatment SIGNIFICANT in both excess hypo/hyper methylation **

# $`pairwise differences of Treatment`
## HYPO
# 1                       estimate   SE   df t.ratio p.value
# NE_control - E_control     23.71 7.28 10.3   3.257  0.0353
# NE_control - E_exposed     26.88 7.26 10.3   3.701  0.0172

## HYPER
# 1                       estimate   SE   df t.ratio p.value
# NE_control - E_control    -24.06 7.36 10.3  -3.269  0.0348
# NE_control - E_exposed    -27.07 7.34 10.3  -3.687  0.0177

annotate_figure(ggarrange(listplots[[1]], listplots[[2]],ncol = 2, nrow = 1),
                top = text_grob("Parental DMS hypermethylated upon infection, in offspring"))

-> The beta values in parentalDMS in offspring follow the parental pattern hypo/hyper methylated upon infection